File size: 4,880 Bytes
b24d496
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from elasticsearch import Elasticsearch
import os

es_index_name = os.environ.get("ES_INDEX_NAME", "")

es = None

if os.environ.get("ES_URL", ""):
    es = Elasticsearch(os.environ.get("ES_URL", ""))

def search_companies(query, size=100):
    
    if es is None:
        return []
    
    search_body = {
        "min_score": 3.5,
        "query": {
            "function_score": {
                "query": {
                    "multi_match": {
                        "query": query,
                        "fields": ["short_company_name"],
                        "analyzer": "custom_russian_analyzer"
                    }
                },
                "functions": [
                    {
                        "filter": {
                            "bool": {
                                "should": [
                                    {"match": {"short_company_name": "норильский"}},
                                    {"match": {"short_company_name": "норникель"}},
                                    {"match": {"short_company_name": "Норникель"}},
                                    {"match": {"short_company_name": "нн"}},
                                    {"match": {"short_company_name": "никель"}}
                                ]
                            }
                        },
                        "weight": 0.38
                    },
                    {
                        "filter": {
                            "bool": {
                                "should": [
                                    # {"match": {"short_company_name": "кольская"}},
                                    {"match": {"short_company_name": "гмк"}}
                                ]
                            }
                        },
                        "weight": 0.4
                    },
                    {
                        "filter": {
                            "bool": {
                                "should": [
                                    # {"match": {"short_company_name": "комбинат"}},
                                    {"match": {"short_company_name": "транспорт"}},
                                    {"match": {"short_company_name": "спутник"}},
                                    {"match": {"short_company_name": "сфера"}},
                                    {"match": {"short_company_name": "сервисы"}},
                                    {"match": {"short_company_name": "авиа"}},
                                    {"match": {"short_company_name": "аэропорт"}}
                                ]
                            }
                        },
                        "weight": 2.5
                    },
                    {
                        "filter": {
                            "bool": {
                                "should": [
                                    # {"match": {"short_company_name": "коропоративный"}},
                                    # {"match": {"short_company_name": "университет"}},
                                    {"match": {"short_company_name": "пао"}}
                                ]
                            }
                        },
                        "weight": 1.45
                    }
                ],
                "boost_mode": "multiply"
            }
        },
        "size": size,
        "highlight": {
            "fields": {
                "short_company_name": {}
            },
            "pre_tags": ["<b>"],
            "post_tags": ["</b>"],
            "fragment_size": 150
        }
    }

    # Выполнение поиска
    res = es.search(index=es_index_name, body=search_body)

    # Обработка результатов
    results = []
    if res["hits"]["total"]["value"] > 0:
        for hit in res["hits"]["hits"]:
            company = hit["_source"].get("short_company_name", "Название компании не указано")
            files_name = hit["_source"].get("folder_path", "Путь к файлам не найден")
            highlights = hit.get("highlight", {})
            score = hit["_score"]  # Получение оценки релевантности
            results.append({
                "company": company,
                "files_path": files_name,
                "highlights": highlights,
                "score": score  # Добавляем `score` в результат
            })

    return results

def find_nmd_docs(user_query, maps):

    results = search_companies(user_query, size=100)

    names = []
    for result in results:
        files_path = result["files_path"]
        if files_path in maps:
            names.extend(maps[files_path])

    return [name[:-5] for name in names]