mongoDB之分頁查詢 skip() limit()
阿新 • • 發佈:2018-11-15
在研究 mongo 分頁查詢的過程中,發現數據量大了之後,就查詢特別慢;在研究中發現,mongo 處理資料主要依賴記憶體,在 cpu,memory 的佔用率過高的情況下,mongoDB 的效率就會直線下降;所以在對 mongo 操作的過程中,要注意記憶體的消耗;不可做任何佔用大量記憶體的事情;
分頁查詢中,skip 資料量大了之後;效率就直線下降;
我們可以通過多次少量查詢來解決這個問題,你會發現雖然查詢多次,效率依然可觀;
通過上次查詢的結果作為下次查詢的條件:
因為我由時間排序,所以我以時間為查詢條件:
public static FindIterable<Document> findFindIterable(MongoCollection<Document> mongoCollection, BasicDBObject basicDBObject, Map<String, Integer> sort,String sortField, int skip, int limit, Date startTime, Date endTime) throws Exception{ if (mongoCollection == null) { return null; } int sortNum = -1; if (sort != null && StringUtils.isNotEmpty(sortField) && sort.get(sortField) != null) { if (sort.get(sortField) == MongoEnums.Sort.ASC.getValue()) { sortNum = 1; } } int skipNum = 1600; //每次查詢條數 int skipStart = 1600; //以此條數為起點 if (skip > skipStart) { SimpleDateFormat sdf = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss"); SimpleDateFormat sdfData = new SimpleDateFormat("EEE MMM dd HH:mm:ss z yyyy", Locale.ENGLISH); int f = (int)Math.floor(Double.valueOf(skip)/Double.valueOf(skipNum)); for (int i = 0; i <= f; i++) { FindIterable<Document> findIterableSome = mongoCollection.find(basicDBObject).sort(new BasicDBObject(sortField,sortNum)); if (i<f) { //查詢獲取時間 findIterableSome.limit(skipNum).skip(skipNum-1); Document first = findIterableSome.first(); String date = first.get(sortField).toString(); date = sdf.format(sdfData.parse(date)); basicDBObject.remove(sortField); if (sortNum == -1){ Map<String, Object> queryMap = new HashMap<String, Object>(); if (startTime != null) { queryMap.put("$gt", startTime); } if (StringUtils.isNotEmpty(date)) { Date end = sdf.parse(date); queryMap.put("$lt", end); } basicDBObject.put(sortField, new BasicDBObject(queryMap)); } else { Map<String, Object> queryMap = new HashMap<String, Object>(); if (StringUtils.isNotEmpty(date)) { Date start = sdf.parse(date); queryMap.put("$gt", start); } if (endTime != null) { queryMap.put("$lt", endTime); } //重新設定時間條件 basicDBObject.put(sortField, new BasicDBObject(queryMap)); } } else { //少於skipNum就直接返回查詢結果; if (skip > skipNum){ skip = skip - f*skipNum; } if (isSignlessInteger(skip) == true) { findIterableSome.skip(skip); } return findIterableSome.limit(limit); } } } else { //少於skipStart就直接返回查詢結果 FindIterable<Document> findIterable = mongoCollection.find(basicDBObject).sort(new BasicDBObject(sortField,sortNum)); if (isSignlessInteger(skip) == true) { findIterable.skip(skip); } return findIterable.limit(limit); } return null; } /** *判斷是否是正整數 */ private static boolean isSignlessInteger(int num) { if (num > 0 && num%1 == 0) { return true; } return false; } /** *定義 DEsc,ASc列舉 */ public class MongoEnums { public enum Sort { DESC(2, "desc"), ASC(1, "asc"); private int value; private String name; Sort(int value, String name) { this.value = value; this.name = name; } public int getValue() { return value; } public String getName() { return name; } } }
注意:一定要注意記憶體的使用,能釋放的記憶體就釋放;注意變數的作用域;儘量不要定義FindIterable<Document>的變數,一般定義一次就好,mongo 已經有很好的處理,沒必要畫蛇添足,不然特別佔用記憶體;
在使用過程中,發現時間有有一樣的;上面的思路就有問題了,如何處理呢;我們可以通過 time+_id 來排序;優化程式碼如下:
//先後排序!!!!先按sortField排序,如果sortField相同;再按_id排序!
//mongoDB 時間的比較
//mongoDB _id的比較
public static FindIterable<Document> findIterable(MongoCollection<Document> mongoCollection, BasicDBObject basicDBObject, Map<String, Integer> sort,String sortField, int skip, int limit, Date startTime, Date endTime) throws Exception{ if (mongoCollection == null) { return null; } int sortNum = -1; if (sort != null && StringUtils.isNotEmpty(sortField) && sort.get(sortField) != null) { if (sort.get(sortField) == MongoEnums.Sort.ASC.getValue()) { sortNum = 1; } } int skipStart = 1600; if (skip > skipStart) { int skipNum = 1600; if (skip >= 40000) { skipNum = 6000; } if (skip >= 1000000) { skipNum = 20000; } int countAll = 0; SimpleDateFormat sdf = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss"); SimpleDateFormat sdfData = new SimpleDateFormat("EEE MMM dd HH:mm:ss z yyyy", Locale.ENGLISH); int f = (int)Math.floor(Double.valueOf(skip)/Double.valueOf(skipNum)); for (int i = 0; i <= f; i++) { //先後排序!!!!先按sortField排序,如果sortField相同;再按_id排序! Document doc= new Document(); doc.append(sortField, sortNum); doc.append("_id", sortNum); FindIterable<Document> findIterableSome = mongoCollection.find(basicDBObject).sort(doc); if (i<f) { findIterableSome.limit(skipNum).skip(skipNum-1); countAll += skipNum; System.out.println(i+"=i=======countAll====="+countAll); Document first = findIterableSome.first(); if (i == f-1) { System.out.println("=======first+"+first); } findIterableSome = null; Object date = first.get(sortField); String _id = first.get("_id").toString(); System.out.println("=========id======="+_id); basicDBObject.remove(sortField); basicDBObject.remove("_id"); if (sortNum == -1){ Map<String, Object> queryMap = new HashMap<String, Object>(); if (startTime != null) { queryMap.put("$gt", startTime); } if (date != null) { queryMap.put("$lte", date); } //mongoDB 時間的比較 basicDBObject.put(sortField, new BasicDBObject(queryMap)); Map<String, Object> queryMapId = new HashMap<String, Object>(); //mongoDB _id的比較 queryMapId.put("$lt",new ObjectId(_id)); basicDBObject.put("_id",new BasicDBObject(queryMapId)); } else { Map<String, Object> queryMap = new HashMap<String, Object>(); if (date != null) { queryMap.put("$gte", date); } if (endTime != null) { queryMap.put("$lt", endTime); } basicDBObject.put(sortField, new BasicDBObject(queryMap)); Map<String, Object> queryMapId = new HashMap<String, Object>(); queryMapId.put("$gt",new ObjectId(_id)); basicDBObject.put("_id",new BasicDBObject(queryMapId)); } } else { System.out.println("========"+mongoCollection.countDocuments(basicDBObject)); if (skip > skipNum){ skip = skip - f*skipNum; } System.out.println("============skip="+skip); if (isSignlessInteger(skip) == true) { findIterableSome.skip(skip); } return findIterableSome.limit(limit); } } } else { FindIterable<Document> findIterable = mongoCollection.find(basicDBObject).sort(new BasicDBObject(sortField,sortNum)); if (isSignlessInteger(skip) == true) { findIterable.skip(skip); } return findIterable.limit(limit); } return null; }
通過測試,最後整合程式碼,如下:
/**
*分頁部分程式碼
*/
public <T> List<T> selectPage(OperatorEnum operatorEnum, Map<String, Object> queryTerms) {
Object sort = queryTerms.get("sort");
String sortField = queryTerms.get("sortField") == null ? null : queryTerms.get("sortField").toString();
//獲取查詢條件 basicDBObject
BasicDBObject basicDBObject = getBasicDBObject(queryTerms);
int skip = queryTerms.get("skip") == null ? 0 : Integer.valueOf(queryTerms.get("skip").toString());
int limit = queryTerms.get("limit") == null ? 0 : Integer.valueOf(queryTerms.get("limit").toString());
Date startTime = queryTerms.get("startTime") == null ? null : (Date)queryTerms.get("startTime");
Date endTime = queryTerms.get("endTime") == null ? null : (Date)queryTerms.get("endTime");
//獲取 mongoDB 連線
final MongoCollection<Document> mongoCollection = getCollection("collectionName");
List<T> list = new ArrayList<>();
if (mongoCollection == null) {
return list;
}
int sortNum = -1;
if (sort != null && sortField != null) {
if (sort.toString().equals(MongoEnums.Sort.ASC.getValue())) {
sortNum = 1;
}
}
int skipStart = 1600;
if (skip > skipStart) {
int skipNum = 1600;
if (skip >= 40000) {
skipNum = 6000;
}
if (skip >= 1000000) {
skipNum = 20000;
}
SimpleDateFormat sdf = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss");
SimpleDateFormat sdfData = new SimpleDateFormat("EEE MMM dd HH:mm:ss z yyyy", Locale.ENGLISH);
int f = (int)Math.floor(Double.valueOf(skip)/Double.valueOf(skipNum));
String _id = null;
Object date = null;
for (int i = 0; i <= f; i++) {
//可實現 --先按時間排序,時間相同的資料按_id排序;
Document doc= new Document();
doc.append(sortField, sortNum);
doc.append("_id", sortNum);
FindIterable<Document> findIterableSome = mongoCollection.find(basicDBObject).sort(doc);
int skipSameDate = 0;
if (StringUtils.isNotBlank(_id) && date != null) {
Map<String, Object> queryMapId = new HashMap<String, Object>();
if (sortNum == -1) {
queryMapId.put("$lt",new ObjectId(_id));
} else {
queryMapId.put("$gt",new ObjectId(_id));
}
_id = null;
BasicDBObject objDb = new BasicDBObject();
objDb.put(sortField,date);
objDb.put("_id",new BasicDBObject(queryMapId));
date = null;
//找出時間相同,並且數過的資料條數;
skipSameDate = Integer.parseInt(String.valueOf(mongoCollection.countDocuments(objDb)));
}
if (i<f) {
//把數過的資料減去;
findIterableSome.limit(skipNum-skipSameDate).skip(skipNum-skipSameDate-1);
Document first = findIterableSome.first();
findIterableSome = null;
date = first.get(sortField);
_id = first.get("_id").toString();
first = null;
basicDBObject.remove(sortField);
if (sortNum == -1){
Map<String, Object> queryMap = new HashMap<String, Object>();
if (startTime != null) {
queryMap.put("$gt", startTime);
}
if (date != null) {
queryMap.put("$lt", date);
}
basicDBObject.put(sortField, new BasicDBObject(queryMap));
} else {
Map<String, Object> queryMap = new HashMap<String, Object>();
if (date != null) {
queryMap.put("$gt", date);
}
if (endTime != null) {
queryMap.put("$lt", endTime);
}
basicDBObject.put(sortField, new BasicDBObject(queryMap));
}
} else {
if (skip > skipNum){
skip = skip - f*skipNum-skipSameDate;
}
if (isSignlessInteger(skip) == true) {
findIterableSome.skip(skip);
}
findIterableSome.limit(limit);
MongoCursor<Document> iterator = findIterableSome.iterator();
while (iterator.hasNext()){
//返回泛型
Bean bean = JSONObject.parseObject(iterator.next().getString("content"), Bean.class);
list.add((T) Bean);
}
return list;
}
}
} else {
FindIterable<Document> findIterable = mongoCollection.find(basicDBObject).sort(new BasicDBObject(sortField,sortNum));
if (isSignlessInteger(skip) == true) {
findIterable.skip(skip);
}
findIterable.limit(limit);
MongoCursor<Document> iterator = findIterable.iterator();
while (iterator.hasNext()){
Bean bean = JSONObject.parseObject(iterator.next().getString("content"), Bean.class);
list.add((T) Bean);
}
return list;
}
}
mongo 官網:
https://docs.mongodb.com/manual/reference/operator/aggregation/skip/
檢視命令執行時長;
*.explain("executionStats")