1. 程式人生 > >ES 搜索(5)—— 常用查詢語句

ES 搜索(5)—— 常用查詢語句

terms ica word ner lsp pretty 個數 iter fix

match_all query
#匹配所有文檔,得分全為1.
curl -XGET ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{ "query": { "match_all": {} } }
boost改變得分
curl -XGET ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{
    "query": {
        "match_all": { "boost" : 1.2 }
    }
}
不匹配任何文檔
curl -XGET ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{
    "query": {
        "match_none": {}
    }
}
2.全文查詢 full text query 執行查詢之前先分析查詢字符串 通常是文本字段查詢

match_query

執行全文查詢的標準查詢,包括模糊匹配和短語或近似查詢

GET /_search
{
    "query": {
        "match" : {
            
"message" : "this is a test" } } }

match_phrase

match查詢類似,但用於匹配精確短語或單詞近似匹配

GET /_search
{
    "query": {
        "match_phrase" : {
            "message" : "this is a test"
        }
    }
}
#指定分詞器
curl -XGET ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{
"query": { "match_phrase" : { "message" : { "query" : "this is a test", "analyzer" : "my_analyzer" } } } }

match_phrase_prefix

match_phrase查詢一樣,但在最後一個單詞上做了通配符搜索。

GET /_search
{
    "query": {
        "match_phrase_prefix" : {
            "message" : "quick brown f"
        }
    }
}
#max_expansions控制可接受的後綴的數量,如10 返回10個結果
curl -XGET ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{
    "query": {
        "match_phrase_prefix" : {
            "message" : {
                "query" : "quick brown f",
                "max_expansions" : 10
            }
        }
    }
}

multi_match query

多字段查詢

如果沒有指定query field,查詢時按照index.query.default_field默認字段查詢。

一次查詢最多支持1024個字段

multi_match的查詢類型包括:

best_fields 查詢與任何字段匹配的文檔 _score來自最佳字段的文檔

默認type

most_fields 查詢與任何字段匹配的文檔 _score來自每個字段的文檔

cross_fields 不能使用模糊查詢

phrase 在每個字段上執行match_phrase查詢,_score來自每個字段

phrase_prefix 在每個字段上執行phrase_match_phrase查詢,_score來自每個字段

#query String  和 query field
curl -XGET ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{ "query": { "multi_match" : { "query": "this is a test", "fields": [ "subject", "message" ] } } }
#包含通配符的多字段查詢 query fields 為:title,first_name,last_name
curl -XGET ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{ "query": { "multi_match" : { "query": "Will Smith", "fields": [ "title", "*_name" ] } } }
#subject字段的重要性是message字段的三倍
curl -XGET ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{ "query": { "multi_match" : { "query" : "this is a test", "fields" : [ "subject^3", "message" ] } } }
#通常best_fields得分為最佳匹配文檔得分,如果指定了tie_breaker,則加上tie_breaker*_score for all
other match field。也可以指定其他參數:analyzer, boost, operator, minimum_should_match, fuzziness,
lenient, prefix_length, max_expansions, rewrite, zero_terms_query, cutoff_frequency,
auto_generate_synonyms_phrase_query and fuzzy_transpositions
curl -XGET ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{ "query": { "multi_match" : { "query": "brown fox", "type": "best_fields", "fields": [ "subject", "message" ], "tie_breaker": 0.3 } } }
#相當於執行

curl -XGET ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d‘
{
"query": {
"dis_max": {
"queries": [
{ "match": { "subject": "brown fox" }},
{ "match": { "message": "brown fox" }}
],
"tie_breaker": 0.3
}
}
}

#operator為and要求所有字段都要匹配
curl -XGET ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{ "query": { "multi_match" : { "query": "Will Smith", "type": "best_fields", "fields": [ "first_name", "last_name" ], "operator": "and" } } }
#相當於執行
(+first_name:will +first_name:smith) | (+last_name:will +last_name:smith)
#得分計算:每個match子句的分數相加,再除以match子句的個數
curl -XGET ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{ "query": { "multi_match" : { "query": "quick brown fox", "type": "most_fields", "fields": [ "title", "title.original", "title.shingles" ] } } } #相當於執行 curl -XGET ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{ "query": { "bool": { "should": [ { "match": { "title": "quick brown fox" }}, { "match": { "title.original": "quick brown fox" }}, { "match": { "title.shingles": "quick brown fox" }} ] } } }
curl -XGET ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{
  "query": {
    "multi_match" : {
      "query":      "Will Smith",
      "type":       "cross_fields",
      "fields":     [ "first_name", "last_name" ],
      "operator":   "and"
    }
  }
}
#相當於
+(first_name:will  last_name:will)
+(first_name:smith last_name:smith)

common terms query 將查詢詞分為兩種:

1.重要的 (more important low frequency)

3.不重要的(less important high frequency)通常為stopwords

minimum_should_match:2//默認為低頻詞
GET /_search
{
    "query": {
        "common": {
            "body": {
                "query": "this is bonsai cool",
                "cutoff_frequency": 0.001
            }
        }
    }
}
curl -XGET ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{
    "query": {
        "common": {
            "body": {
                "query": "nelly the elephant not as a cartoon",
                "cutoff_frequency": 0.001,
                "minimum_should_match": {
                    "low_freq" : 2,
                    "high_freq" : 3
                }
            }
        }
    }
}
#相當於
curl -XGET ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{
    "query": {
        "bool": {
            "must": {
                "bool": {
                    "should": [
                    { "term": { "body": "nelly"}},
                    { "term": { "body": "elephant"}},
                    { "term": { "body": "cartoon"}}
                    ],
                    "minimum_should_match": 2
                }
            },
            "should": {
                "bool": {
                    "should": [
                    { "term": { "body": "the"}},
                    { "term": { "body": "not"}},
                    { "term": { "body": "as"}},
                    { "term": { "body": "a"}}
                    ],
                    "minimum_should_match": 3
                }
            }
        }
    }
}
query_string
curl -XGET ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{
    "query": {
        "query_string" : {
            "default_field" : "content",
            "query" : "this AND that OR thus"
        }
    }
}
curl -XGET ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{
    "query": {
        "query_string" : {
            "fields" : ["content", "name"],
            "query" : "this AND that"
        }
    }
}
curl -XGET ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{
    "query": {
        "query_string" : {
            "fields" : ["city.*"],
            "query" : "this AND that OR thus"
        }
    }
}

3.term query 按照存儲在倒排索引中的確切詞進行操作 常用於數字,

日期和枚舉等結構化數據,而不是全文本字段

term query

查詢特定字段中包含的特定術語的文檔

boost 給定一個更高的相關性分值

term查詢確切匹配倒排索引
match相關匹配
保存數據時,
映射類型為text會按分詞器分詞結果存儲在倒排索引中
映射類型為keyword 不按分詞器分詞,整個詞直接存儲在倒排索引中。
例如:
https://www.elastic.co/guide/en/elasticsearch/
reference/6.1/query-dsl-term-query.html
curl -XPOST ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{
  "query": {
    "term" : { "user" : "Kimchy" } 
  }
}
curl -XGET ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{
  "query": {
    "bool": {
      "should": [
        {
          "term": {
            "status": {
              "value": "urgent",
              "boost": 2.0 
            }
          }
        },
        {
          "term": {
            "status": "normal" 
          }
        }
      ]
    }
  }
}

terms query

查詢符合任何一個term的文檔

過濾機制:

可指定參數:index、type、id、path、routing

curl -XGET ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{
    "query": {
        "terms" : { "user" : ["kimchy", "elasticsearch"]}
    }
}
curl -XPUT ‘localhost:9200/users/user/2?pretty‘ -H ‘Content-Type: application/json‘ -d{
    "followers" : ["1", "3"]
}
curl -XPUT ‘localhost:9200/tweets/tweet/1?pretty‘ -H ‘Content-Type: application/json‘ -d{
    "user" : "1"
}
curl -XGET ‘localhost:9200/tweets/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{
    "query" : {
        "terms" : {
            "user" : {
                "index" : "users",
                "type" : "user",
                "id" : "2",
                "path" : "followers"
            }
        }
    }
}

terms_set query
curl -XPUT ‘localhost:9200/my-index?pretty‘ -H ‘Content-Type: application/json‘ -d{
    "mappings": {
        "doc": {
            "properties": {
                "required_matches": {
                    "type": "long"
                }
            }
        }
    }
}
curl -XPUT ‘localhost:9200/my-index/doc/1?refresh&pretty‘ -H ‘Content-Type: application/json‘ -d{
    "codes": ["ghi", "jkl"],
    "required_matches": 2
}
curl -XPUT ‘localhost:9200/my-index/doc/2?refresh&pretty‘ -H ‘Content-Type: application/json‘ -d{
    "codes": ["def", "ghi"],
    "required_matches": 2
}
#可根據minimum_should_match_field參數指定至少匹配的文檔數字段
curl -XGET ‘localhost:9200/my-index/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{
    "query": {
        "terms_set": {
            "codes" : {
                "terms" : ["abc", "def", "ghi"],
                "minimum_should_match_field": "required_matches"
            }
        }
    }
}
#根據腳本指定至少匹配的文檔數字段
curl -XGET ‘localhost:9200/my-index/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{ "query": { "terms_set": { "codes" : { "terms" : ["abc", "def", "ghi"], "minimum_should_match_script": { "source": "Math.min(params.num_terms, doc[‘required_matches‘].value)" } } } } }
4. range query

TermRangeQuery 針對string字段

NumericRangeQuery 針對數據、日期字段

#gte大於等於 gt大於 lte小於等於 lt小於
curl -XGET ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{ "query": { "range" : { "age" : { "gte" : 10, "lte" : 20, "boost" : 2.0 } } } }

查詢日期範圍時,可以使用date math表達式

日期格式

時區問題

curl -XGET ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{
    "query": {
        "range" : {
            "date" : {
                "gte" : "now-1d/d",
                "lt" :  "now/d"
            }
        }
    }
}
curl -XGET ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{
    "query": {
        "range" : {
            "born" : {
                "gte": "01/01/2012",
                "lte": "2013",
                "format": "dd/MM/yyyy||yyyy"
            }
        }
    }
}
#2015-01-01 00:00:00將轉為2014-12-31T23:00:00 UTC ;now不受時區影響
curl -XGET ‘localhost:9200/_search?pretty‘ -H ‘Content-Type: application/json‘ -d{
    "query": {
        "range" : {
            "timestamp" : {
                "gte": "2015-01-01 00:00:00", 
                "lte": "now", 
                "time_zone": "+01:00"
            }
        }
    }
}

ES 搜索(5)—— 常用查詢語句