muryshev's picture
init
b24d496
from elasticsearch import Elasticsearch
import os
es_index_name = os.environ.get("ES_INDEX_NAME", "")
es = None
if os.environ.get("ES_URL", ""):
es = Elasticsearch(os.environ.get("ES_URL", ""))
def search_companies(query, size=100):
if es is None:
return []
search_body = {
"min_score": 3.5,
"query": {
"function_score": {
"query": {
"multi_match": {
"query": query,
"fields": ["short_company_name"],
"analyzer": "custom_russian_analyzer"
}
},
"functions": [
{
"filter": {
"bool": {
"should": [
{"match": {"short_company_name": "норильский"}},
{"match": {"short_company_name": "норникель"}},
{"match": {"short_company_name": "Норникель"}},
{"match": {"short_company_name": "нн"}},
{"match": {"short_company_name": "никель"}}
]
}
},
"weight": 0.38
},
{
"filter": {
"bool": {
"should": [
# {"match": {"short_company_name": "кольская"}},
{"match": {"short_company_name": "гмк"}}
]
}
},
"weight": 0.4
},
{
"filter": {
"bool": {
"should": [
# {"match": {"short_company_name": "комбинат"}},
{"match": {"short_company_name": "транспорт"}},
{"match": {"short_company_name": "спутник"}},
{"match": {"short_company_name": "сфера"}},
{"match": {"short_company_name": "сервисы"}},
{"match": {"short_company_name": "авиа"}},
{"match": {"short_company_name": "аэропорт"}}
]
}
},
"weight": 2.5
},
{
"filter": {
"bool": {
"should": [
# {"match": {"short_company_name": "коропоративный"}},
# {"match": {"short_company_name": "университет"}},
{"match": {"short_company_name": "пао"}}
]
}
},
"weight": 1.45
}
],
"boost_mode": "multiply"
}
},
"size": size,
"highlight": {
"fields": {
"short_company_name": {}
},
"pre_tags": ["<b>"],
"post_tags": ["</b>"],
"fragment_size": 150
}
}
# Выполнение поиска
res = es.search(index=es_index_name, body=search_body)
# Обработка результатов
results = []
if res["hits"]["total"]["value"] > 0:
for hit in res["hits"]["hits"]:
company = hit["_source"].get("short_company_name", "Название компании не указано")
files_name = hit["_source"].get("folder_path", "Путь к файлам не найден")
highlights = hit.get("highlight", {})
score = hit["_score"] # Получение оценки релевантности
results.append({
"company": company,
"files_path": files_name,
"highlights": highlights,
"score": score # Добавляем `score` в результат
})
return results
def find_nmd_docs(user_query, maps):
results = search_companies(user_query, size=100)
names = []
for result in results:
files_path = result["files_path"]
if files_path in maps:
names.extend(maps[files_path])
return [name[:-5] for name in names]