ES 父子文档

目录

为什么要一定要为子文档指定routing

The routing value is mandatory because parent and child documents must be indexed on the same shard

A document is routed to a particular shard in an index using the following formula:

shard_num = hash(_routing) % num_primary_shards

The default value used for _routing is the document’s _id.

所以在写入数据的时候使用父文档的_id来作为routing的值,来保证子文档和父文档在同一个分片中。


索引定义
{
  "mappings": {
    "test_20191127": {
      "properties": {
        "created_at": {
          "type": "date",
          "format": "yyyy-MM-dd HH:mm:ss"
        },
        "ip": {
          "type": "keyword",
          "normalizer": "standard_lowercase"
        },
        "cve_id": {
          "type": "keyword",
          "normalizer": "standard_lowercase"
        },
        "apply": {
          "type": "keyword",
          "normalizer": "standard_lowercase"
        },
        "join_field": {
          "type": "join",
          "relations": {
            "ip": [
              "cve",
              "apply"
            ]
          }
        }
      }
    }
  },
  "settings": {
    "index": {
      "search": {
        "slowlog": {
          "threshold": {
            "fetch": {
              "warn": "1s",
              "trace": "200ms",
              "debug": "500ms",
              "info": "800ms"
            },
            "query": {
              "warn": "10s",
              "trace": "500ms",
              "debug": "2s",
              "info": "5s"
            }
          }
        }
      },
      "number_of_shards": "10",
      "max_result_window": "10000000",
      "mapper": {
        "dynamic": "false"
      },
      "unassigned": {
        "node_left": {
          "delayed_timeout": "1m"
        }
      },
      "analysis": {
        "normalizer": {
          "standard_lowercase": {
            "filter": [
              "lowercase"
            ],
            "type": "custom"
          }
        }
      },
      "number_of_replicas": "0"
    }
  }
}

写入数据
    $create = \Elasticsearch\ClientBuilder::create();
    $create->setHosts([['host' => "10.249.43.93", 'user' => 'xxxx', 'pass' => 'xxxx']]);
    $client = $create->build();

    for ($i = 1; $i <= 10; $i++) {
        $id     = $i;
        $parent = [
            'ip'         => $i,
            "join_field" => 'ip',
        ];

        $cve = [
            'ip'         => $i,
            "join_field" => ['name' => 'cve', "parent" => $id],
            "cve_id"     => 'CVE-' . $i,
        ];

        $apply = [
            'ip'         => $i,
            "join_field" => ['name' => 'apply', "parent" => $id],
            "apply"      => 'APPLY-' . $i,
        ];

        /**
         * $params['index']        = (string) The name of the index (Required)
         *        ['type']         = (string) The type of the document (Required)
         *        ['id']           = (string) Specific document ID (when the POST method is used)
         *        ['consistency']  = (enum) Explicit write consistency setting for the operation
         *        ['op_type']      = (enum) Explicit operation type
         *        ['parent']       = (string) ID of the parent document
         *        ['refresh']      = (boolean) Refresh the index after performing the operation
         *        ['replication']  = (enum) Specific replication type
         *        ['routing']      = (string) Specific routing value
         *        ['timeout']      = (time) Explicit operation timeout
         *        ['timestamp']    = (time) Explicit timestamp for the document
         *        ['ttl']          = (duration) Expiration time for the document
         *        ['version']      = (number) Explicit version number for concurrency control
         *        ['version_type'] = (enum) Specific version type
         *        ['body']         = (array) The document
         *
         */

        //插入父文档
        $client->index(['index' => 'test_20191127', 'type' => 'test_20191127', 'id' => $id, 'body' => $parent]);
        //插入子文档,并指定routing为父文档ID
        $client->index(['index' => 'test_20191127', 'type' => 'test_20191127', 'routing' => $id, 'body' => $cve]);
        $client->index(['index' => 'test_20191127', 'type' => 'test_20191127', 'routing' => $id, 'body' => $apply]);      
    }

数据概览
{
    "took": 0,
    "timed_out": false,
    "_shards": {
        "total": 10,
        "successful": 10,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": 30,
        "max_score": 1.0,
        "hits": [
            {
                "_index": "test_20191127",
                "_type": "test_20191127",
                "_id": "5",
                "_score": 1.0,
                "_source": {
                    "ip": 5,
                    "join_field": "ip"
                }
            },
            {
                "_index": "test_20191127",
                "_type": "test_20191127",
                "_id": "UX7Hq24B5GVBR06e807I",
                "_score": 1.0,
                "_routing": "5",
                "_source": {
                    "ip": 5,
                    "join_field": {
                        "name": "cve",
                        "parent": 5
                    },
                    "cve_id": "CVE-5"
                }
            },
            {
                "_index": "test_20191127",
                "_type": "test_20191127",
                "_id": "Un7Hq24B5GVBR06e807Y",
                "_score": 1.0,
                "_routing": "5",
                "_source": {
                    "ip": 5,
                    "join_field": {
                        "name": "apply",
                        "parent": 5
                    },
                    "apply": "APPLY-5"
                }
            },
            {
                "_index": "test_20191127",
                "_type": "test_20191127",
                "_id": "10",
                "_score": 1.0,
                "_source": {
                    "ip": 10,
                    "join_field": "ip"
                }
            },
            {
                "_index": "test_20191127",
                "_type": "test_20191127",
                "_id": "W37Hq24B5GVBR06e9E5v",
                "_score": 1.0,
                "_routing": "10",
                "_source": {
                    "ip": 10,
                    "join_field": {
                        "name": "cve",
                        "parent": 10
                    },
                    "cve_id": "CVE-10"
                }
            },
            {
                "_index": "test_20191127",
                "_type": "test_20191127",
                "_id": "XH7Hq24B5GVBR06e9E54",
                "_score": 1.0,
                "_routing": "10",
                "_source": {
                    "ip": 10,
                    "join_field": {
                        "name": "apply",
                        "parent": 10
                    },
                    "apply": "APPLY-10"
                }
            },
            {
                "_index": "test_20191127",
                "_type": "test_20191127",
                "_id": "1",
                "_score": 1.0,
                "_source": {
                    "ip": 1,
                    "join_field": "ip"
                }
            },
            {
                "_index": "test_20191127",
                "_type": "test_20191127",
                "_id": "SX7Hq24B5GVBR06e804m",
                "_score": 1.0,
                "_routing": "1",
                "_source": {
                    "ip": 1,
                    "join_field": {
                        "name": "cve",
                        "parent": 1
                    },
                    "cve_id": "CVE-1"
                }
            },
            {
                "_index": "test_20191127",
                "_type": "test_20191127",
                "_id": "Sn7Hq24B5GVBR06e8041",
                "_score": 1.0,
                "_routing": "1",
                "_source": {
                    "ip": 1,
                    "join_field": {
                        "name": "apply",
                        "parent": 1
                    },
                    "apply": "APPLY-1"
                }
            },
            {
                "_index": "test_20191127",
                "_type": "test_20191127",
                "_id": "3",
                "_score": 1.0,
                "_source": {
                    "ip": 3,
                    "join_field": "ip"
                }
            }
        ]
    }
}

查询所有父文档

要确保每个父文档都拥有这个子文档才可以, 查询语法如下:

{
    "query": {
        "has_child" : {
            "type" : "apply",
            "query" : {
                "match_all":{}
            }
        }
    }
}

结果如下:

{
    "took": 0,
    "timed_out": false,
    "_shards": {
        "total": 10,
        "successful": 10,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": 10,
        "max_score": 1.0,
        "hits": [
            {
                "_index": "test_20191127",
                "_type": "test_20191127",
                "_id": "5",
                "_score": 1.0,
                "_source": {
                    "ip": 5,
                    "join_field": "ip"
                }
            },
            {
                "_index": "test_20191127",
                "_type": "test_20191127",
                "_id": "10",
                "_score": 1.0,
                "_source": {
                    "ip": 10,
                    "join_field": "ip"
                }
            },
            {
                "_index": "test_20191127",
                "_type": "test_20191127",
                "_id": "1",
                "_score": 1.0,
                "_source": {
                    "ip": 1,
                    "join_field": "ip"
                }
            },
            {
                "_index": "test_20191127",
                "_type": "test_20191127",
                "_id": "3",
                "_score": 1.0,
                "_source": {
                    "ip": 3,
                    "join_field": "ip"
                }
            },
            {
                "_index": "test_20191127",
                "_type": "test_20191127",
                "_id": "8",
                "_score": 1.0,
                "_source": {
                    "ip": 8,
                    "join_field": "ip"
                }
            },
            {
                "_index": "test_20191127",
                "_type": "test_20191127",
                "_id": "9",
                "_score": 1.0,
                "_source": {
                    "ip": 9,
                    "join_field": "ip"
                }
            },
            {
                "_index": "test_20191127",
                "_type": "test_20191127",
                "_id": "2",
                "_score": 1.0,
                "_source": {
                    "ip": 2,
                    "join_field": "ip"
                }
            },
            {
                "_index": "test_20191127",
                "_type": "test_20191127",
                "_id": "4",
                "_score": 1.0,
                "_source": {
                    "ip": 4,
                    "join_field": "ip"
                }
            },
            {
                "_index": "test_20191127",
                "_type": "test_20191127",
                "_id": "6",
                "_score": 1.0,
                "_source": {
                    "ip": 6,
                    "join_field": "ip"
                }
            },
            {
                "_index": "test_20191127",
                "_type": "test_20191127",
                "_id": "7",
                "_score": 1.0,
                "_source": {
                    "ip": 7,
                    "join_field": "ip"
                }
            }
        ]
    }
}

根据子文档过滤得出父文档

查询语法:

{
    "query": {
        "has_child" : {
            "type" : "apply",
            "query" : {
                "term":{
                    "apply":"APPLY-5"
                }
            }
        }
    }
}

结果:

{
    "took": 0,
    "timed_out": false,
    "_shards": {
        "total": 10,
        "successful": 10,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": 1,
        "max_score": 1.0,
        "hits": [
            {
                "_index": "test_20191127",
                "_type": "test_20191127",
                "_id": "5",
                "_score": 1.0,
                "_source": {
                    "ip": 5,
                    "join_field": "ip"
                }
            }
        ]
    }
}

根据父文档过滤得出子文档

查询如下

{
    "size":100,
    "query": {
        "has_parent" : {
            "parent_type" : "ip",
            "query" : {
               "term":{
                "ip":"5"
               }
            }
        }
    }
}

结果如下:

{
    "took": 0,
    "timed_out": false,
    "_shards": {
        "total": 10,
        "successful": 10,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": 2,
        "max_score": 1.0,
        "hits": [
            {
                "_index": "test_20191127",
                "_type": "test_20191127",
                "_id": "6n73q24B5GVBR06e7k_w",
                "_score": 1.0,
                "_routing": "5",
                "_source": {
                    "type": "cve",
                    "ip": 5,
                    "join_field": {
                        "name": "cve",
                        "parent": 5
                    },
                    "cve_id": "CVE-5"
                }
            },
            {
                "_index": "test_20191127",
                "_type": "test_20191127",
                "_id": "6373q24B5GVBR06e708C",
                "_score": 1.0,
                "_routing": "5",
                "_source": {
                    "type": "apply",
                    "ip": 5,
                    "join_field": {
                        "name": "apply",
                        "parent": 5
                    },
                    "apply": "APPLY-5"
                }
            }
        ]
    }
}