Spaces:
Runtime error
Runtime error
Upload 3 files
Browse files- .gitattributes +1 -0
- embeded_data.json +3 -0
- get_similar_hadiths.py +55 -0
- sahih.json +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
embeded_data.json filter=lfs diff=lfs merge=lfs -text
|
embeded_data.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad8208fd5ff051c60c23288fc096d1ee5449951d679b6b103edf8efd07dd7721
|
3 |
+
size 95939294
|
get_similar_hadiths.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import os
|
3 |
+
import numpy as np
|
4 |
+
from openai import OpenAI
|
5 |
+
|
6 |
+
class HadithSearch:
|
7 |
+
def __init__(self, api_key):
|
8 |
+
self.client = OpenAI(api_key=api_key)
|
9 |
+
self.data = None
|
10 |
+
|
11 |
+
def _cosine_similarity(self, a, b):
|
12 |
+
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
|
13 |
+
|
14 |
+
def _get_embedding(self, text, model="text-embedding-ada-002"):
|
15 |
+
try:
|
16 |
+
text = text.replace("\n", " ")
|
17 |
+
except Exception as e:
|
18 |
+
pass
|
19 |
+
text = "Ceci est le ressenti d'un musulman et nous cherchons les hadiths qui peuvent l'aider: " + text
|
20 |
+
return self.client.embeddings.create(input=text, model=model).data[0].embedding
|
21 |
+
|
22 |
+
def load_data_from_json(self, json_file):
|
23 |
+
self.data = pd.read_json(json_file)
|
24 |
+
|
25 |
+
def search_hadiths(self, user_input, num_hadiths=10):
|
26 |
+
if self.data is None:
|
27 |
+
raise ValueError("Data not loaded. Use load_data_from_json method to load data.")
|
28 |
+
|
29 |
+
embedding_column_name = "embeddings"
|
30 |
+
try:
|
31 |
+
self.data[embedding_column_name] = self.data.embeddings.apply(lambda x: x["embeding"])
|
32 |
+
except Exception as e:
|
33 |
+
pass
|
34 |
+
|
35 |
+
embedding = self._get_embedding(user_input, model='text-embedding-ada-002')
|
36 |
+
self.data['similarity'] = self.data.embeddings.apply(lambda x: self._cosine_similarity(x, embedding))
|
37 |
+
|
38 |
+
results = self.data.sort_values('similarity', ascending=False).head(int(num_hadiths)).to_dict(orient="records")
|
39 |
+
formatted_results = self._format_results(results)
|
40 |
+
return formatted_results
|
41 |
+
|
42 |
+
def _format_results(self, results):
|
43 |
+
formatted_output = ""
|
44 |
+
for idx, result in enumerate(results, start=1):
|
45 |
+
similarity_percentage = str(round(result["similarity"] * 100, 2)) + "%"
|
46 |
+
book = result["book"]
|
47 |
+
chapter = result["chapter"]
|
48 |
+
content = result["content"]
|
49 |
+
display = (
|
50 |
+
f"## Hadith numéro {idx}: Pertinence par rapport à votre situation : {similarity_percentage}\n"
|
51 |
+
f"## Livre : {book}\n"
|
52 |
+
f"## Chapitre : {chapter}\n{content}\n\n------\n\n"
|
53 |
+
)
|
54 |
+
formatted_output += display
|
55 |
+
return formatted_output
|
sahih.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|