1. 程式人生 > >mongoDB之分頁查詢 skip() limit()

mongoDB之分頁查詢 skip() limit()

在研究 mongo 分頁查詢的過程中,發現數據量大了之後,就查詢特別慢;在研究中發現,mongo 處理資料主要依賴記憶體,在 cpu,memory 的佔用率過高的情況下,mongoDB 的效率就會直線下降;所以在對 mongo 操作的過程中,要注意記憶體的消耗;不可做任何佔用大量記憶體的事情;

分頁查詢中,skip 資料量大了之後;效率就直線下降;

我們可以通過多次少量查詢來解決這個問題,你會發現雖然查詢多次,效率依然可觀;

通過上次查詢的結果作為下次查詢的條件:

因為我由時間排序,所以我以時間為查詢條件:

public static FindIterable<Document> findFindIterable(MongoCollection<Document> mongoCollection, BasicDBObject basicDBObject, Map<String, Integer> sort,String sortField, int skip, int limit, Date startTime, Date endTime) throws Exception{
		if (mongoCollection == null) {
			return null;
		}
		int sortNum = -1;
		if (sort != null && StringUtils.isNotEmpty(sortField) && sort.get(sortField) != null) {
			if (sort.get(sortField) == MongoEnums.Sort.ASC.getValue()) {
				sortNum = 1;
			}
		}
		int skipNum = 1600; //每次查詢條數
		int skipStart = 1600; //以此條數為起點
		if (skip > skipStart) {
			SimpleDateFormat sdf = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss");
			SimpleDateFormat sdfData = new SimpleDateFormat("EEE MMM dd HH:mm:ss z yyyy", Locale.ENGLISH);
			int f = (int)Math.floor(Double.valueOf(skip)/Double.valueOf(skipNum));
			for (int i = 0; i <= f; i++) {
				FindIterable<Document> findIterableSome = mongoCollection.find(basicDBObject).sort(new BasicDBObject(sortField,sortNum));
				if (i<f) {
                    //查詢獲取時間
					findIterableSome.limit(skipNum).skip(skipNum-1);
					Document first = findIterableSome.first();
					String date = first.get(sortField).toString();
					date = sdf.format(sdfData.parse(date));
					basicDBObject.remove(sortField);
					if (sortNum == -1){
						Map<String, Object> queryMap = new HashMap<String, Object>();
						if (startTime != null) {
							queryMap.put("$gt", startTime);
						}
						if (StringUtils.isNotEmpty(date)) {
							Date end = sdf.parse(date);
							queryMap.put("$lt", end);
						}
						basicDBObject.put(sortField, new BasicDBObject(queryMap));
					} else {
						Map<String, Object> queryMap = new HashMap<String, Object>();
						if (StringUtils.isNotEmpty(date)) {
							Date start = sdf.parse(date);
							queryMap.put("$gt", start);
						}
						if (endTime != null) {
							queryMap.put("$lt", endTime);
						}
                        //重新設定時間條件
						basicDBObject.put(sortField, new BasicDBObject(queryMap));
					}
				} else {
                    //少於skipNum就直接返回查詢結果;
					if (skip > skipNum){
						skip = skip - f*skipNum;
					}
					if (isSignlessInteger(skip) == true) {
						findIterableSome.skip(skip);
					}
					return findIterableSome.limit(limit);
				}
			}
		} else {
            //少於skipStart就直接返回查詢結果
			FindIterable<Document> findIterable = mongoCollection.find(basicDBObject).sort(new BasicDBObject(sortField,sortNum));
			if (isSignlessInteger(skip) == true) {
				findIterable.skip(skip);
			}
			return findIterable.limit(limit);
		}
		return null;
	}

/**
*判斷是否是正整數
*/
private static boolean isSignlessInteger(int num) {
	if (num > 0 && num%1 == 0) {
		return true;
	}
	return false;
}

/**
*定義 DEsc,ASc列舉
*/
public class MongoEnums {
	  public enum Sort {
	        DESC(2, "desc"), ASC(1, "asc");
	        private int value;
	        private String name;

	        Sort(int value, String name) {
	            this.value = value;
	            this.name = name;
	        }

	        public int getValue() {
	            return value;
	        }

	        public String getName() {
	            return name;
	        }
	    }
}

注意:一定要注意記憶體的使用,能釋放的記憶體就釋放;注意變數的作用域;儘量不要定義FindIterable<Document>的變數,一般定義一次就好,mongo 已經有很好的處理,沒必要畫蛇添足,不然特別佔用記憶體;

在使用過程中,發現時間有有一樣的;上面的思路就有問題了,如何處理呢;我們可以通過 time+_id 來排序;優化程式碼如下:

   //先後排序!!!!先按sortField排序,如果sortField相同;再按_id排序!

   //mongoDB 時間的比較

    //mongoDB _id的比較
 

	public static FindIterable<Document> findIterable(MongoCollection<Document> mongoCollection, BasicDBObject basicDBObject, Map<String, Integer> sort,String sortField, int skip, int limit, Date startTime, Date endTime) throws Exception{
		if (mongoCollection == null) {
			return null;
		}
		int sortNum = -1;
		if (sort != null && StringUtils.isNotEmpty(sortField) && sort.get(sortField) != null) {
			if (sort.get(sortField) == MongoEnums.Sort.ASC.getValue()) {
				sortNum = 1;
			}
		}
		int skipStart = 1600;
		if (skip > skipStart) {
			int skipNum = 1600;
			if (skip >= 40000) {
				skipNum = 6000;
			}
			if (skip >= 1000000) {
				skipNum = 20000;
			}
			int countAll = 0;
			SimpleDateFormat sdf = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss");
			SimpleDateFormat sdfData = new SimpleDateFormat("EEE MMM dd HH:mm:ss z yyyy", Locale.ENGLISH);
			int f = (int)Math.floor(Double.valueOf(skip)/Double.valueOf(skipNum));
			for (int i = 0; i <= f; i++) {
                //先後排序!!!!先按sortField排序,如果sortField相同;再按_id排序!
				Document doc= new Document();
				doc.append(sortField, sortNum);
				doc.append("_id", sortNum);
				FindIterable<Document> findIterableSome = mongoCollection.find(basicDBObject).sort(doc);
				if (i<f) {
					findIterableSome.limit(skipNum).skip(skipNum-1);
					countAll += skipNum;
					System.out.println(i+"=i=======countAll====="+countAll);
					Document first = findIterableSome.first();
					if (i == f-1) {
						System.out.println("=======first+"+first);
					}
					findIterableSome = null;
					Object date = first.get(sortField);
					String _id = first.get("_id").toString();
					System.out.println("=========id======="+_id);
					basicDBObject.remove(sortField);
					basicDBObject.remove("_id");
					if (sortNum == -1){
						Map<String, Object> queryMap = new HashMap<String, Object>();
						if (startTime != null) {
							queryMap.put("$gt", startTime);
						}
						if (date != null) {
							queryMap.put("$lte", date);
						}
                            //mongoDB 時間的比較
						basicDBObject.put(sortField, new BasicDBObject(queryMap));
						Map<String, Object> queryMapId = new HashMap<String, Object>();
                            //mongoDB _id的比較
						queryMapId.put("$lt",new ObjectId(_id));
						basicDBObject.put("_id",new BasicDBObject(queryMapId));
					} else {
						Map<String, Object> queryMap = new HashMap<String, Object>();
						if (date != null) {
							queryMap.put("$gte", date);
						}
						if (endTime != null) {
							queryMap.put("$lt", endTime);
						}
						basicDBObject.put(sortField, new BasicDBObject(queryMap));
						Map<String, Object> queryMapId = new HashMap<String, Object>();
						queryMapId.put("$gt",new ObjectId(_id));
						basicDBObject.put("_id",new BasicDBObject(queryMapId));
					}
				} else {
					System.out.println("========"+mongoCollection.countDocuments(basicDBObject));
					if (skip > skipNum){
						skip = skip - f*skipNum;
					}
					System.out.println("============skip="+skip);
					if (isSignlessInteger(skip) == true) {
						findIterableSome.skip(skip);
					}
					return findIterableSome.limit(limit);
				}
			}
		} else {
			FindIterable<Document> findIterable = mongoCollection.find(basicDBObject).sort(new BasicDBObject(sortField,sortNum));
			if (isSignlessInteger(skip) == true) {
				findIterable.skip(skip);
			}
			return findIterable.limit(limit);
		}
		return null;
	}

通過測試,最後整合程式碼,如下:

/**
*分頁部分程式碼
*/
	public <T> List<T> selectPage(OperatorEnum operatorEnum, Map<String, Object> queryTerms) {

Object sort = queryTerms.get("sort");
String sortField = queryTerms.get("sortField") == null ? null : queryTerms.get("sortField").toString();
//獲取查詢條件 basicDBObject
BasicDBObject basicDBObject = getBasicDBObject(queryTerms);
int skip = queryTerms.get("skip") == null ? 0 : Integer.valueOf(queryTerms.get("skip").toString());
int limit = queryTerms.get("limit") == null ? 0 : Integer.valueOf(queryTerms.get("limit").toString());
Date startTime = queryTerms.get("startTime") == null ? null : (Date)queryTerms.get("startTime");
Date endTime = queryTerms.get("endTime") == null ? null : (Date)queryTerms.get("endTime");
//獲取 mongoDB 連線
final MongoCollection<Document> mongoCollection = getCollection("collectionName");
List<T> list = new ArrayList<>();
if (mongoCollection == null) {
	return list;
}
int sortNum = -1;
if (sort != null && sortField != null) {
	if (sort.toString().equals(MongoEnums.Sort.ASC.getValue())) {
		sortNum = 1;
	}
}
int skipStart = 1600;
if (skip > skipStart) {
	int skipNum = 1600;
	if (skip >= 40000) {
		skipNum = 6000;
	}
	if (skip >= 1000000) {
		skipNum = 20000;
	}
	SimpleDateFormat sdf = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss");
	SimpleDateFormat sdfData = new SimpleDateFormat("EEE MMM dd HH:mm:ss z yyyy", Locale.ENGLISH);
	int f = (int)Math.floor(Double.valueOf(skip)/Double.valueOf(skipNum));
	String _id = null;
	Object date = null;
	for (int i = 0; i <= f; i++) {
        //可實現 --先按時間排序,時間相同的資料按_id排序;
		Document doc= new Document();
		doc.append(sortField, sortNum);
		doc.append("_id", sortNum);
		FindIterable<Document> findIterableSome = mongoCollection.find(basicDBObject).sort(doc);
		int skipSameDate = 0;
		if (StringUtils.isNotBlank(_id) && date != null) {
			Map<String, Object> queryMapId = new HashMap<String, Object>();
			if (sortNum == -1) {
				queryMapId.put("$lt",new ObjectId(_id));
			} else {
				queryMapId.put("$gt",new ObjectId(_id));
			}
			_id = null;
			BasicDBObject objDb = new BasicDBObject();
			objDb.put(sortField,date);
			objDb.put("_id",new BasicDBObject(queryMapId));
			date = null;
            //找出時間相同,並且數過的資料條數;
			skipSameDate = Integer.parseInt(String.valueOf(mongoCollection.countDocuments(objDb)));
		}
		if (i<f) {
            //把數過的資料減去;
			findIterableSome.limit(skipNum-skipSameDate).skip(skipNum-skipSameDate-1);
			Document first = findIterableSome.first();
			findIterableSome = null;
			date = first.get(sortField);
			_id = first.get("_id").toString();
			first = null;
			basicDBObject.remove(sortField);
			if (sortNum == -1){
				Map<String, Object> queryMap = new HashMap<String, Object>();
				if (startTime != null) {
					queryMap.put("$gt", startTime);
				}
				if (date != null) {
					queryMap.put("$lt", date);
				}
				basicDBObject.put(sortField, new BasicDBObject(queryMap));
			} else {
				Map<String, Object> queryMap = new HashMap<String, Object>();
				if (date != null) {
					queryMap.put("$gt", date);
				}
				if (endTime != null) {
					queryMap.put("$lt", endTime);
				}
				basicDBObject.put(sortField, new BasicDBObject(queryMap));
			}
		} else {
			if (skip > skipNum){
				skip = skip - f*skipNum-skipSameDate;
			}
			if (isSignlessInteger(skip) == true) {
				findIterableSome.skip(skip);
			}
			findIterableSome.limit(limit);
			MongoCursor<Document> iterator = findIterableSome.iterator();
			while (iterator.hasNext()){
                //返回泛型
				Bean bean = JSONObject.parseObject(iterator.next().getString("content"), Bean.class);
				list.add((T) Bean);
			}
			return list;
		}
	}
} else {
	FindIterable<Document> findIterable = mongoCollection.find(basicDBObject).sort(new BasicDBObject(sortField,sortNum));
	if (isSignlessInteger(skip) == true) {
		findIterable.skip(skip);
	}
	findIterable.limit(limit);
	MongoCursor<Document> iterator = findIterable.iterator();
	while (iterator.hasNext()){
		Bean bean = JSONObject.parseObject(iterator.next().getString("content"), Bean.class);
		list.add((T) Bean);
	}
	return list;
}
}

mongo 官網:

https://docs.mongodb.com/manual/reference/operator/aggregation/skip/

檢視命令執行時長;

*.explain("executionStats")