1. 程式人生 > >MongoDB4:聚合函式group和mapReduce

MongoDB4:聚合函式group和mapReduce

1.group測試:

db.order.insert({id:123,mount:500})
yooo:PRIMARY> db.order.find()
{ "_id" : ObjectId("5b387105c0298213b42c35de"), "id" : 123, "mount" : 500 }
{ "_id" : ObjectId("5b387113c0298213b42c35df"), "id" : 123, "mount" : 250 }
{ "_id" : ObjectId("5b387124c0298213b42c35e0"), "id" : 212, "mount" : 200 }
{ "_id" : ObjectId("5b387131c0298213b42c35e1"), "id" : 123, "mount" : 300 }
{ "_id" : ObjectId("5b387a62c0298213b42c35e2"), "id" : 212, "mount" : 275 }

yooo:PRIMARY> db.order.group(
...                 {
...                  key: {id:1},   ----以id欄位進行分組
...                  reduce: function( doc, result ) {
...                               result.total += doc.mount;  ---每分組統計mount
...                               result.count++; ---每分組統計文件個數         
...                               },
...                   initial: { total : 0,count: 0 } ,
...                   finalize: function(result) {
...                                result.avg=result.total/result.count ---每分組的總mount/分組文件個數
...                                }
...                  }
...                  )
[
{ "id" : 123, "total" : 1050, "count" : 3, "avg" : 350 }
{ "id" : 212, "total" : 475, "count" : 2, "avg" : 237.5 }
]
實現了SQL語句同樣的功能:
SELECT id ,SUM(mount) as total,COUNT(id) as count, (SUM(mount)/COUNT(id)) as avg FROM order GROUP BY id

2.mapReduce測試:
https://docs.mongodb.com/manual/tutorial/map-reduce-examples/
>分片叢集環境需要用mapReduce不能使用group:
The db.collection.group() method does not work with sharded clusters.
① map:為對映函式,裡面會呼叫emit(key,value),集合會按照指定的key進行對映分組
② reduce:為簡化函式,會對map分組後的資料進行分組簡化,在reduce(key,value)中
                  的key就是emit中的key,vlaue則為emit分組後的emit(value)的集合


③ mapReduce:是最後執行的函數了,引數為map,reduce和其他可選引數

1>分組並統計mount總和
var mf=function() {emit(this.id,this.mount);};
var rf=function(key, valuemount) {return Array.sum(valuemount); };
db.order.mapReduce(mf,rf, { out: "mrout" } )

yooo:PRIMARY> db.order.mapReduce(mf,rf, { out: "mrout" } )
{
 "result" : "mrout",  ---輸出結果到mtout的集合中
 "timeMillis" : 58,
 "counts" : {
  "input" : 5,    ----輸入的文件個數
  "emit" : 5,     ----emit函式呼叫次數
  "reduce" : 2,   ----reduce函式呼叫次數
  "output" : 2    ----返回輸出文件個數
 },
 "ok" : 1,
 "operationTime" : Timestamp(1530441013, 4),
 "$clusterTime" : {
  "clusterTime" : Timestamp(1530441013, 4),
  "signature" : {
   "hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
   "keyId" : NumberLong(0)
  }
 }
}
yooo:PRIMARY> db.mrout.find()
{ "_id" : 123, "value" : 1050 }
{ "_id" : 212, "value" : 475 }
2>分組並統計每組文件個數:
下面的寫法結果都是一樣:
var mf2=function(){emit(this.id,1)};
var rf2=function(key,value){var i=0;value.forEach(function(x){i+=x});return i;};
db.order.mapReduce(mf2,rf2, { out: "mrout2" } )

var mf3=function(){emit(this.id,1)};
var rf3=function(key, value) {return Array.sum(value); };
db.order.mapReduce(mf3,rf3, { out: "mrout3" } )

var mf7=function(){emit(this.id,{count:1})};
var rf7=function(key,value){
                           var result={num:0};
                           value.forEach(function(x){result.num+=x.count});
                           return result;
          };
db.order.mapReduce(mf7,rf7, { out: "mrout7" } )
yooo:PRIMARY> db.mrout7.find()
{ "_id" : 123, "value" : { "num" : 3 } }
{ "_id" : 212, "value" : { "num" : 2 } }
3.以id欄位分組,並統計每組mount總和和每組的文件個數:
var mf0=function(){
                   emit(this.id,{mount:this.mount,count:1})
                   };
map函式的結果是:
第一組:{key:123,values:[{mount:500,count:1},{mount:250,count:1},{mount:300,count:1}]}
第二組:{key:212,values:[{mount:200,count:1},{mount:275,count:1}}

var rf0=function(key,value){ 
                           var result={total:0,num:0};
                           for(var i=0;i<value.length;i++){
                               result.total+=value[i].mount;
                               result.num+=value[i].count;
                            }
                           return result;
          };

var finalff0 = function (key, result) {
                     result.avg=result.total/result.num; 
                     return result; 
                 };
db.order.mapReduce(mf0,rf0, { out: "mrout0",finalize:finalff0 } )

yooo:PRIMARY> db.mrout0.find()
{ "_id" : 123, "value" : { "total" : 1050, "num" : 3, "avg" : 350 } }
{ "_id" : 212, "value" : { "total" : 475, "num" : 2, "avg" : 237.5 } }
至此mapReduce輸出了和上面group一樣分組統計結果。