Spaces:
Sleeping
Sleeping
from elasticsearch import Elasticsearch | |
import os | |
es_index_name = os.environ.get("ES_INDEX_NAME", "") | |
es = None | |
if os.environ.get("ES_URL", ""): | |
es = Elasticsearch(os.environ.get("ES_URL", "")) | |
def search_companies(query, size=100): | |
if es is None: | |
return [] | |
search_body = { | |
"min_score": 3.5, | |
"query": { | |
"function_score": { | |
"query": { | |
"multi_match": { | |
"query": query, | |
"fields": ["short_company_name"], | |
"analyzer": "custom_russian_analyzer" | |
} | |
}, | |
"functions": [ | |
{ | |
"filter": { | |
"bool": { | |
"should": [ | |
{"match": {"short_company_name": "норильский"}}, | |
{"match": {"short_company_name": "норникель"}}, | |
{"match": {"short_company_name": "Норникель"}}, | |
{"match": {"short_company_name": "нн"}}, | |
{"match": {"short_company_name": "никель"}} | |
] | |
} | |
}, | |
"weight": 0.38 | |
}, | |
{ | |
"filter": { | |
"bool": { | |
"should": [ | |
# {"match": {"short_company_name": "кольская"}}, | |
{"match": {"short_company_name": "гмк"}} | |
] | |
} | |
}, | |
"weight": 0.4 | |
}, | |
{ | |
"filter": { | |
"bool": { | |
"should": [ | |
# {"match": {"short_company_name": "комбинат"}}, | |
{"match": {"short_company_name": "транспорт"}}, | |
{"match": {"short_company_name": "спутник"}}, | |
{"match": {"short_company_name": "сфера"}}, | |
{"match": {"short_company_name": "сервисы"}}, | |
{"match": {"short_company_name": "авиа"}}, | |
{"match": {"short_company_name": "аэропорт"}} | |
] | |
} | |
}, | |
"weight": 2.5 | |
}, | |
{ | |
"filter": { | |
"bool": { | |
"should": [ | |
# {"match": {"short_company_name": "коропоративный"}}, | |
# {"match": {"short_company_name": "университет"}}, | |
{"match": {"short_company_name": "пао"}} | |
] | |
} | |
}, | |
"weight": 1.45 | |
} | |
], | |
"boost_mode": "multiply" | |
} | |
}, | |
"size": size, | |
"highlight": { | |
"fields": { | |
"short_company_name": {} | |
}, | |
"pre_tags": ["<b>"], | |
"post_tags": ["</b>"], | |
"fragment_size": 150 | |
} | |
} | |
# Выполнение поиска | |
res = es.search(index=es_index_name, body=search_body) | |
# Обработка результатов | |
results = [] | |
if res["hits"]["total"]["value"] > 0: | |
for hit in res["hits"]["hits"]: | |
company = hit["_source"].get("short_company_name", "Название компании не указано") | |
files_name = hit["_source"].get("folder_path", "Путь к файлам не найден") | |
highlights = hit.get("highlight", {}) | |
score = hit["_score"] # Получение оценки релевантности | |
results.append({ | |
"company": company, | |
"files_path": files_name, | |
"highlights": highlights, | |
"score": score # Добавляем `score` в результат | |
}) | |
return results | |
def find_nmd_docs(user_query, maps): | |
results = search_companies(user_query, size=100) | |
names = [] | |
for result in results: | |
files_path = result["files_path"] | |
if files_path in maps: | |
names.extend(maps[files_path]) | |
return [name[:-5] for name in names] |