nn-search-full

Sleeping

File size: 4,880 Bytes

b24d496

from elasticsearch import Elasticsearch
import os

es_index_name = os.environ.get("ES_INDEX_NAME", "")

es = None

if os.environ.get("ES_URL", ""):
    es = Elasticsearch(os.environ.get("ES_URL", ""))

def search_companies(query, size=100):
    
    if es is None:
        return []
    
    search_body = {
        "min_score": 3.5,
        "query": {
            "function_score": {
                "query": {
                    "multi_match": {
                        "query": query,
                        "fields": ["short_company_name"],
                        "analyzer": "custom_russian_analyzer"
                    }
                },
                "functions": [
                    {
                        "filter": {
                            "bool": {
                                "should": [
                                    {"match": {"short_company_name": "норильский"}},
                                    {"match": {"short_company_name": "норникель"}},
                                    {"match": {"short_company_name": "Норникель"}},
                                    {"match": {"short_company_name": "нн"}},
                                    {"match": {"short_company_name": "никель"}}
                                ]
                            }
                        },
                        "weight": 0.38
                    },
                    {
                        "filter": {
                            "bool": {
                                "should": [
                                    # {"match": {"short_company_name": "кольская"}},
                                    {"match": {"short_company_name": "гмк"}}
                                ]
                            }
                        },
                        "weight": 0.4
                    },
                    {
                        "filter": {
                            "bool": {
                                "should": [
                                    # {"match": {"short_company_name": "комбинат"}},
                                    {"match": {"short_company_name": "транспорт"}},
                                    {"match": {"short_company_name": "спутник"}},
                                    {"match": {"short_company_name": "сфера"}},
                                    {"match": {"short_company_name": "сервисы"}},
                                    {"match": {"short_company_name": "авиа"}},
                                    {"match": {"short_company_name": "аэропорт"}}
                                ]
                            }
                        },
                        "weight": 2.5
                    },
                    {
                        "filter": {
                            "bool": {
                                "should": [
                                    # {"match": {"short_company_name": "коропоративный"}},
                                    # {"match": {"short_company_name": "университет"}},
                                    {"match": {"short_company_name": "пао"}}
                                ]
                            }
                        },
                        "weight": 1.45
                    }
                ],
                "boost_mode": "multiply"
            }
        },
        "size": size,
        "highlight": {
            "fields": {
                "short_company_name": {}
            },
            "pre_tags": ["<b>"],
            "post_tags": ["</b>"],
            "fragment_size": 150
        }
    }

    # Выполнение поиска
    res = es.search(index=es_index_name, body=search_body)

    # Обработка результатов
    results = []
    if res["hits"]["total"]["value"] > 0:
        for hit in res["hits"]["hits"]:
            company = hit["_source"].get("short_company_name", "Название компании не указано")
            files_name = hit["_source"].get("folder_path", "Путь к файлам не найден")
            highlights = hit.get("highlight", {})
            score = hit["_score"]  # Получение оценки релевантности
            results.append({
                "company": company,
                "files_path": files_name,
                "highlights": highlights,
                "score": score  # Добавляем `score` в результат
            })

    return results

def find_nmd_docs(user_query, maps):

    results = search_companies(user_query, size=100)

    names = []
    for result in results:
        files_path = result["files_path"]
        if files_path in maps:
            names.extend(maps[files_path])

    return [name[:-5] for name in names]