PHP 高级搜索语法解析库

github

First add the library to your project:

composer require netgen/query-translator:^1.0

Demo 如下:

xx

app/Service/Query/ESQueryGenerator.php

<?php
/**
 * Created by PhpStorm.
 * User: admin
 * Date: 2020/1/14
 * Time: 11:22
 */

namespace App\Service\Query;

use App\Models\ES\Search;
use Illuminate\Support\Arr;

class ESQueryGenerator
{

    /**
     * 模糊查詢的字符串最大長度
     * @var int
     */
    const QUERY_STRING_LIMIT = 200;

    /**
     * 精准查询
     * @param $field
     * @param $val
     * @return array
     */
    public static function termQuery($field, $val)
    {
        return [
            'term' => [$field => $val],
        ];
    }

    /**
     * 嵌套精准查询
     * @param $path
     * @param $field
     * @param $val
     * @return array
     */
    public static function nestedTermQuery($path, $field, $val)
    {
        return [
            'nested' => [
                'path'  => $path,
                'query' => [
                    'term' => ["{$path}.{$field}" => $val],
                ],
            ],
        ];
    }

    public static function queryStringQuery($val, $fields = [])
    {
        if (strlen($val) > self::QUERY_STRING_LIMIT) {
            $val = substr($val, 0, self::QUERY_STRING_LIMIT);
        }

        //转义
        $val = addcslashes($val, ":/?&()'");

        //空格替换(解决:查询英文单词时,有空格无法查出结果)
        $val = str_replace(' ', '?', $val);

        //不存在*追加一下
        if (strpos($val, '*') === false) {
            $val = "*{$val}*";
        }

        if (!$fields) {
            return [
                'query_string' => [
                    'query' => "{$val}",
                ],
            ];
        }

        return [
            'query_string' => [
                'query'            => "{$val}",
                'fields'           => (array)$fields,
                "default_operator" => "AND",
            ],
        ];
    }

    /**
     *
     * 嵌套query string
     *
     * @param $path
     * @param $field
     * @param $val
     * @return array
     */
    public static function nestedQueryStringQuery($path, $val, $field = '')
    {
        if (strlen($val) > self::QUERY_STRING_LIMIT) {
            $val = substr($val, 0, self::QUERY_STRING_LIMIT);
        }
        //转义
        $val = addcslashes($val, ":/?&()'");

        //空格替换(解决:查询英文单词时,有空格无法查出结果)
        $val = str_replace(' ', '?', $val);

        //不存在*追加一下
        if (strpos($val, '*') === false) {
            $val = "*{$val}*";
        }

        if (!$field) {
            return [
                'nested' => [
                    'path'  => $path,
                    'query' => [
                        'query_string' => [
                            'query' => "{$val}",
                        ],
                    ],
                ],
            ];
        }

        return [
            'nested' => [
                'path'  => $path,
                'query' => [
                    'query_string' => [
                        'query'            => "{$val}",
                        'fields'           => [$path . "." . $field],
                        "default_operator" => "AND",
                    ],
                ],
            ],
        ];

    }

    /**
     *
     * 递归生成查询语句
     *
     * @param $query
     * @param $query_arr
     * @param $field_map array 查询字段与实际字段的映射关系
     * @param $OP
     * @return mixed
     */
    public static function makeQuery(&$query, $query_arr, $field_map, $OP = 'DEFAULT')
    {

        foreach ($query_arr as $type => $child_query_arr) {

            if (is_numeric($type)) {

                //数字索引跳过继续
                self::makeQuery($query, $child_query_arr, $field_map, $OP);

            } elseif (in_array($type, ['OR', 'AND'], true)) {

                //翻译操作符
                switch ($type) {
                    case 'OR':
                        $op = 'should';
                        break;
                    case 'AND':
                        $op = 'must';
                        break;
                    default:
                        $op = 'must';
                        break;
                }

                //带上操作符继续
                self::makeQuery($query[]['bool'][$op], $child_query_arr, $field_map, $op);

            } elseif ($type == "GROUP") {

                //获取组内容、操作符继续
                self::makeQuery($query, $child_query_arr['value'][0], $field_map, $OP);

            } elseif (in_array($type, ['WORD', 'PHRASE'], true)) {//最终语句拼接

                //精准字段搜
                if ($child_query_arr['field'] !== '~') {

                    $child_query_arr['field'] = Arr::get($field_map, $child_query_arr['field'],
                        $child_query_arr['field']);

                    //是否是嵌套类型
                    $nested = false;
                    if (strpos($child_query_arr['field'], '@') !== false) {
                        $nested = true;
                        list($path, $field) = explode('@', $child_query_arr['field']);
                    } else {
                        $path  = null;
                        $field = $child_query_arr['field'];
                    }

                    //term or query_string
                    switch ($type) {
                        case 'PHRASE':
                            if ($nested) {
                                $query[] = self::nestedTermQuery($path, $field, $child_query_arr['value']);
                            } else {
                                $query[] = self::termQuery($field, $child_query_arr['value']);
                            }
                            break;
                        default:
                            if ($nested) {
                                $query[] = self::nestedQueryStringQuery($path, $child_query_arr['value'],
                                    $field);
                            } else {
                                $query[] = self::queryStringQuery($child_query_arr['value'], $field);
                            }
                            break;
                    }

                } else {
                    $query[] = self::queryStringQuery($child_query_arr['value']);
                }
            }

        }

        return $query;
    }

    /**
     *
     * 获取查询语句对应的ES请求体
     *
     * @param $q
     * @param $field_map //搜索语法字段映射
     * @param $index
     * @param $type
     * @param $size
     * @param int $page
     * @param string $order_by
     * @param string $order_type
     * @return array
     */
    public static function getQuery(
        $q,
        $field_map,
        $index,
        $type,
        $size,
        $page = 1,
        $order_by = 'updated_at',
        $order_type = 'desc'
    ) {
        $from = max(($page - 1) * $size, 0);

        if (!$q) {
            return [
                'index' => $index,
                'type'  => $type,
                'size'  => $size,
                'from'  => $from,
                'body'  => [
                    'sort' => [[$order_by => ['order' => $order_type]]],
                ],
            ];
        }

        $syntax_tree = ParserService::syntaxTree($q)['QUERY'];
        self::makeQuery($query, $syntax_tree, $field_map);

        $request_query = [
            'index' => $index,
            'type'  => $type,
            'size'  => $size,
            'from'  => $from,
            'body'  => [
                'query' => $query[0],
                'sort'  => [[$order_by => ['order' => $order_type]]],
            ],
        ];

        return $request_query;
    }

    /**
     * 对ES结果进行分页结果格式化
     *
     *
     * @param $src
     * @param $page
     * @param $page_size
     * @return array
     */
    public static function paginate($src, $page, $page_size)
    {
        //组装返回数据
        if (Arr::get($src, 'hits.total', 0)) {

            $list = [];
            $hits = Arr::get($src, 'hits.hits');
            if ($hits) {
                foreach ($hits as $hit) {
                    $source                = Arr::get($hit, '_source');
                    $source['index_id']    = Arr::get($hit, '_id');
                    $source['index_index'] = Arr::get($hit, '_index');
                    $list[]                = $source;
                }
            }

            $total = Arr::get($src, 'hits.total');
            $data  = [
                'current_page' => (int)$page,
                'total_page'   => (int)ceil(max($total, 1) / $page_size),
                'total'        => (int)$total,
                'list'         => $list,
            ];
        } else {
            $data = [
                'current_page' => (int)1,
                'total_page'   => (int)1,
                'total'        => (int)0,
                'list'         => [],
            ];
        }

        return $data;
    }

}

app/Service/Query/ParserService.php

<?php
/**
 * Created by PhpStorm.
 * User: admin
 * Date: 2020/1/13
 * Time: 10:56
 */

namespace App\Service\Query;

use App\Models\ES\Search;
use QueryTranslator\Languages\Galach\Tokenizer;
use QueryTranslator\Languages\Galach\TokenExtractor\Full as FullTokenExtractor;
use QueryTranslator\Languages\Galach\Parser;
use QueryTranslator\Languages\Galach\Generators;

class ParserService
{
    const defaultFieldName = 'error';

    public static function syntaxTree($queryString, $fieldMap = Search::FIELD_MAP)
    {

        // 2. Query string is given to Tokenizer which produces an instance of TokenSequence

        // Note that Tokenizer needs a TokenExtractor, which is an extension point
        // Here we use Full TokenExtractor which provides full Galach syntax

        $tokenExtractor = new FullTokenExtractor();
        $tokenizer      = new Tokenizer($tokenExtractor);
        $tokenSequence  = $tokenizer->tokenize($queryString);

        // 3. TokenSequence instance is given to Parser which produces an instance of SyntaxTree

        $parser     = new Parser();
        $syntaxTree = $parser->parse($tokenSequence);

        return SyntaxTreeRenderer::render($syntaxTree);
    }

    public static function queryString($queryString, $fieldMap = Search::FIELD_MAP)
    {

        // 2. Query string is given to Tokenizer which produces an instance of TokenSequence

        // Note that Tokenizer needs a TokenExtractor, which is an extension point
        // Here we use Full TokenExtractor which provides full Galach syntax

        $tokenExtractor = new FullTokenExtractor();
        $tokenizer      = new Tokenizer($tokenExtractor);
        $tokenSequence  = $tokenizer->tokenize($queryString);

        // 3. TokenSequence instance is given to Parser which produces an instance of SyntaxTree

        $parser     = new Parser();
        $syntaxTree = $parser->parse($tokenSequence);

        // If needed, here you can access corrections
        //foreach ($syntaxTree->corrections as $correction) {
        //    echo $correction->type;
        //}

        // 4. Now we can build a generator, in this example an ExtendedDisMax generator to target
        //    Solr's Extended DisMax Query Parser

        // This part is a little bit more involving since we need to build all visitors for different
        // Nodes in the syntax tree

        $visitors   = [];
        $visitors[] = new Generators\Lucene\Common\Prohibited();
        $visitors[] = new Generators\Lucene\Common\Group(
            $fieldMap,
            self::defaultFieldName
        );
        $visitors[] = new Generators\Lucene\Common\Mandatory();
        $visitors[] = new Generators\Lucene\Common\LogicalAnd();
        $visitors[] = new Generators\Lucene\Common\LogicalNot();
        $visitors[] = new Generators\Lucene\Common\LogicalOr();
        $visitors[] = new Generators\Lucene\Common\Phrase(
            $fieldMap,
            self::defaultFieldName
        );
        $visitors[] = new Generators\Lucene\Common\Query();
        $visitors[] = new Generators\Lucene\Common\Tag('tag_ms');
        $visitors[] = new Generators\Lucene\Common\User('user_s');
        $visitors[] = new Generators\Lucene\QueryString\Word(
            $fieldMap,
            self::defaultFieldName
        );

        $aggregate = new Generators\Common\Aggregate($visitors);
        $generator = new Generators\QueryString($aggregate);

        // Now we can use the generator to generate the target output

        $targetString = $generator->generate($syntaxTree);

        return $targetString;
    }

}

app/Service/Query/SyntaxTreeRenderer.php

<?php
/**
 * Created by PhpStorm.
 * User: admin
 * Date: 2020/1/13
 * Time: 15:17
 */

namespace App\Service\Query;

use QueryTranslator\Languages\Galach\Values\Node\Group;
use QueryTranslator\Languages\Galach\Values\Node\Query;
use QueryTranslator\Languages\Galach\Values\Node\LogicalAnd;
use QueryTranslator\Languages\Galach\Values\Node\LogicalOr;
use QueryTranslator\Languages\Galach\Values\Node\LogicalNot;
use QueryTranslator\Languages\Galach\Values\Node\Mandatory;
use QueryTranslator\Languages\Galach\Values\Node\Prohibited;
use QueryTranslator\Languages\Galach\Values\Node\Term;
use QueryTranslator\Languages\Galach\Values\Token\Phrase;
use QueryTranslator\Languages\Galach\Values\Token\Tag;
use QueryTranslator\Languages\Galach\Values\Token\User;
use QueryTranslator\Languages\Galach\Values\Token\Word;
use QueryTranslator\Values\Node;
use QueryTranslator\Values\SyntaxTree;

class SyntaxTreeRenderer
{
    /**
     * Renders structured tree representation of the given $syntaxTree.
     *
     * @param \QueryTranslator\Values\SyntaxTree $syntaxTree
     *
     * @return array
     */
    public static function render(SyntaxTree $syntaxTree)
    {
        return self::convert($syntaxTree->rootNode);

    }

    private static function getNodeName(Node $node)
    {
        switch (true) {
            case $node instanceof Term && $node->token instanceof Phrase:
                return 'PHRASE';
            case $node instanceof Term && $node->token instanceof Tag:
                return 'TAG';
            case $node instanceof Term && $node->token instanceof User:
                return 'USER';
            case $node instanceof Term && $node->token instanceof Word:
                return 'WORD';
            case $node instanceof LogicalAnd:
                return 'AND';
            case $node instanceof LogicalOr:
                return 'OR';
            case $node instanceof LogicalNot:
                return 'NOT';
            case $node instanceof Mandatory:
                return 'MANDATORY';
            case $node instanceof Prohibited:
                return 'PROHIBITED';
            case $node instanceof Group:
                return 'GROUP';
            case $node instanceof Query:
                return 'QUERY';
        }

        throw new \RuntimeException('Did not recognize given node');
    }

    private static function convert(Node $node)
    {
        $subObjects = [];

        if ($node instanceof Term) {
            $subObjects = self::getTermSubObjects($node);
        } else {
            foreach ($node->getNodes() as $subNode) {
                $subObjects[] = self::convert($subNode);
            }
        }

        if ($node instanceof Group) {
            $subObjects = [
                'field' => $node->tokenLeft->domain ?: '~',
                'value' => $subObjects,
            ];
        }

        return [self::getNodeName($node) => $subObjects];
    }

    private static function getTermSubObjects(Term $term)
    {
        $token = $term->token;

        switch (true) {
            case $term->token instanceof Phrase:
                /** @var \QueryTranslator\Languages\Galach\Values\Token\Phrase $token */

                return [
                    'field' => $token->domain ?: '~',
                    'value' => $token->phrase,
                ];

            case $term->token instanceof Tag:
                /** @var \QueryTranslator\Languages\Galach\Values\Token\Tag $token */

                return [
                    'field' => $token->marker ?: '~',
                    'value' => $token->tag,
                ];

            case $term->token instanceof User:
                /** @var \QueryTranslator\Languages\Galach\Values\Token\User $token */

                return [
                    'field' => $token->marker ?: '~',
                    'value' => $token->user,
                ];

            case $term->token instanceof Word:
                /** @var \QueryTranslator\Languages\Galach\Values\Token\Word $token */

                return [
                    'field' => $token->domain ?: '~',
                    'value' => $token->word,
                ];

        }

        throw new \RuntimeException('Did not recognize given node');
    }
}