from elasticsearch import Elasticsearch import os es_index_name = os.environ.get("ES_INDEX_NAME", "") es = None if os.environ.get("ES_URL", ""): es = Elasticsearch(os.environ.get("ES_URL", "")) def search_companies(query, size=100): if es is None: return [] search_body = { "min_score": 3.5, "query": { "function_score": { "query": { "multi_match": { "query": query, "fields": ["short_company_name"], "analyzer": "custom_russian_analyzer" } }, "functions": [ { "filter": { "bool": { "should": [ {"match": {"short_company_name": "норильский"}}, {"match": {"short_company_name": "норникель"}}, {"match": {"short_company_name": "Норникель"}}, {"match": {"short_company_name": "нн"}}, {"match": {"short_company_name": "никель"}} ] } }, "weight": 0.38 }, { "filter": { "bool": { "should": [ # {"match": {"short_company_name": "кольская"}}, {"match": {"short_company_name": "гмк"}} ] } }, "weight": 0.4 }, { "filter": { "bool": { "should": [ # {"match": {"short_company_name": "комбинат"}}, {"match": {"short_company_name": "транспорт"}}, {"match": {"short_company_name": "спутник"}}, {"match": {"short_company_name": "сфера"}}, {"match": {"short_company_name": "сервисы"}}, {"match": {"short_company_name": "авиа"}}, {"match": {"short_company_name": "аэропорт"}} ] } }, "weight": 2.5 }, { "filter": { "bool": { "should": [ # {"match": {"short_company_name": "коропоративный"}}, # {"match": {"short_company_name": "университет"}}, {"match": {"short_company_name": "пао"}} ] } }, "weight": 1.45 } ], "boost_mode": "multiply" } }, "size": size, "highlight": { "fields": { "short_company_name": {} }, "pre_tags": [""], "post_tags": [""], "fragment_size": 150 } } # Выполнение поиска res = es.search(index=es_index_name, body=search_body) # Обработка результатов results = [] if res["hits"]["total"]["value"] > 0: for hit in res["hits"]["hits"]: company = hit["_source"].get("short_company_name", "Название компании не указано") files_name = hit["_source"].get("folder_path", "Путь к файлам не найден") highlights = hit.get("highlight", {}) score = hit["_score"] # Получение оценки релевантности results.append({ "company": company, "files_path": files_name, "highlights": highlights, "score": score # Добавляем `score` в результат }) return results def find_nmd_docs(user_query, maps): results = search_companies(user_query, size=100) names = [] for result in results: files_path = result["files_path"] if files_path in maps: names.extend(maps[files_path]) return [name[:-5] for name in names]