Spaces:
Sleeping
Sleeping
File size: 4,880 Bytes
b24d496 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
from elasticsearch import Elasticsearch
import os
es_index_name = os.environ.get("ES_INDEX_NAME", "")
es = None
if os.environ.get("ES_URL", ""):
es = Elasticsearch(os.environ.get("ES_URL", ""))
def search_companies(query, size=100):
if es is None:
return []
search_body = {
"min_score": 3.5,
"query": {
"function_score": {
"query": {
"multi_match": {
"query": query,
"fields": ["short_company_name"],
"analyzer": "custom_russian_analyzer"
}
},
"functions": [
{
"filter": {
"bool": {
"should": [
{"match": {"short_company_name": "норильский"}},
{"match": {"short_company_name": "норникель"}},
{"match": {"short_company_name": "Норникель"}},
{"match": {"short_company_name": "нн"}},
{"match": {"short_company_name": "никель"}}
]
}
},
"weight": 0.38
},
{
"filter": {
"bool": {
"should": [
# {"match": {"short_company_name": "кольская"}},
{"match": {"short_company_name": "гмк"}}
]
}
},
"weight": 0.4
},
{
"filter": {
"bool": {
"should": [
# {"match": {"short_company_name": "комбинат"}},
{"match": {"short_company_name": "транспорт"}},
{"match": {"short_company_name": "спутник"}},
{"match": {"short_company_name": "сфера"}},
{"match": {"short_company_name": "сервисы"}},
{"match": {"short_company_name": "авиа"}},
{"match": {"short_company_name": "аэропорт"}}
]
}
},
"weight": 2.5
},
{
"filter": {
"bool": {
"should": [
# {"match": {"short_company_name": "коропоративный"}},
# {"match": {"short_company_name": "университет"}},
{"match": {"short_company_name": "пао"}}
]
}
},
"weight": 1.45
}
],
"boost_mode": "multiply"
}
},
"size": size,
"highlight": {
"fields": {
"short_company_name": {}
},
"pre_tags": ["<b>"],
"post_tags": ["</b>"],
"fragment_size": 150
}
}
# Выполнение поиска
res = es.search(index=es_index_name, body=search_body)
# Обработка результатов
results = []
if res["hits"]["total"]["value"] > 0:
for hit in res["hits"]["hits"]:
company = hit["_source"].get("short_company_name", "Название компании не указано")
files_name = hit["_source"].get("folder_path", "Путь к файлам не найден")
highlights = hit.get("highlight", {})
score = hit["_score"] # Получение оценки релевантности
results.append({
"company": company,
"files_path": files_name,
"highlights": highlights,
"score": score # Добавляем `score` в результат
})
return results
def find_nmd_docs(user_query, maps):
results = search_companies(user_query, size=100)
names = []
for result in results:
files_path = result["files_path"]
if files_path in maps:
names.extend(maps[files_path])
return [name[:-5] for name in names] |