muryshev commited on
Commit
53ef13d
·
1 Parent(s): 1b15e0e
huggingface/dataset_utils.py CHANGED
@@ -24,5 +24,5 @@ def get_global_data_path():
24
  if not hf_token or not hf_dataset:
25
  return default_path
26
 
27
- folder = huggingface_hub.snapshot_download(repo_id=hf_dataset, repo_type="dataset", token=hf_token)
28
- return folder+"/legal_info_search_data/"
 
24
  if not hf_token or not hf_dataset:
25
  return default_path
26
 
27
+ folder = huggingface_hub.snapshot_download(repo_id=hf_dataset, repo_type="dataset", token=hf_token, cache_dir="/data")
28
+ return folder
semantic_search.py CHANGED
@@ -29,7 +29,7 @@ hf_dataset = os.environ.get("HF_DATASET", None)
29
  hf_model_name = os.environ.get("HF_MODEL_NAME", "")
30
 
31
  if hf_token is not None and hf_dataset is not None:
32
- global_data_path = dataset_utils.get_global_data_path()
33
  print(f"Global data path: {global_data_path}")
34
 
35
 
 
29
  hf_model_name = os.environ.get("HF_MODEL_NAME", "")
30
 
31
  if hf_token is not None and hf_dataset is not None:
32
+ global_data_path = dataset_utils.get_global_data_path()+global_data_path
33
  print(f"Global data path: {global_data_path}")
34
 
35
 
transaction_maps_search.py CHANGED
@@ -5,9 +5,7 @@ from business_transaction_map.components.embedding_extraction import EmbeddingEx
5
  import os
6
  from prompts import BUSINESS_TRANSACTION_PROMPT
7
  from llm.common import LlmApi
8
-
9
-
10
- db_files_path = os.environ.get("GLOBAL_TRANSACTION_MAPS_DATA_PATH", "transaction_maps_search_data/csv/карта_проводок_new.pkl")
11
 
12
  model_path = os.environ.get("GLOBAL_TRANSACTION_MAPS_MODEL_PATH", "")
13
 
@@ -17,6 +15,14 @@ class TransactionMapsSearch:
17
  model_name_or_path: str = model_path,
18
  device: str = DEVICE):
19
 
 
 
 
 
 
 
 
 
20
  self.device = device
21
  self.model = self.load_model(
22
  model_name_or_path=model_name_or_path,
 
5
  import os
6
  from prompts import BUSINESS_TRANSACTION_PROMPT
7
  from llm.common import LlmApi
8
+ from huggingface import dataset_utils
 
 
9
 
10
  model_path = os.environ.get("GLOBAL_TRANSACTION_MAPS_MODEL_PATH", "")
11
 
 
15
  model_name_or_path: str = model_path,
16
  device: str = DEVICE):
17
 
18
+ hf_token = os.environ.get("HF_TOKEN", None)
19
+ hf_dataset = os.environ.get("HF_DATASET", None)
20
+ db_files_path = os.environ.get("GLOBAL_TRANSACTION_MAPS_DATA_PATH", "transaction_maps_search_data/csv/карта_проводок_new.pkl")
21
+
22
+ if hf_token is not None and hf_dataset is not None:
23
+ db_files_path = dataset_utils.get_global_data_path()+db_files_path
24
+ print(f"Transaction maps data path: {db_files_path}")
25
+
26
  self.device = device
27
  self.model = self.load_model(
28
  model_name_or_path=model_name_or_path,