Spaces:
Runtime error
Runtime error
File size: 5,322 Bytes
82bc3c8 c836c2f 82bc3c8 c836c2f 82bc3c8 c836c2f 82bc3c8 d57284e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
from flask import Flask, request, jsonify
import pandas as pd
import numpy as np
from elasticsearch import Elasticsearch
from scipy.spatial.distance import cosine
from sentence_transformers import SentenceTransformer
import logging
#Creat the flask instance Using create_app
app=Flask(__name__)
# Configure logging
#logging.basicConfig(filename='app.log', level=logging.INFO)
"""
Functions for request/response validation
"""
# Define a function for request validation
def validate_request(request_data):
# Example: Validate that 'question' is present in the request
if 'question' not in request_data:
return False
return True
# Define a function for response validation
def validate_response(response_data):
# Example: Validate that 'message' is present in the response
if 'message' not in response_data:
return False
return True
"""
Function for preparing csv for indexing
"""
def prepare_documents(df):
documents = []
for _, row in df.iterrows():
#row["Embedding"].tolist()
document = {
"Passages": row["Passages"],
"Metadata": row["Metadata"],
"Embedding": {
"type": "dense_vector",
"dims": 3, # Specify the dimensionality of your dense vectors
"value": row["Embedding"].tolist()
}}
documents.append(document)
return documents
"""
function for working with retrival responses
"""
# Extract relevant passages, metadata, and scores
def Extraction(response,question_embedding):
relevant_passages = []
for hit in response["hits"]["hits"]:
passage = hit["_source"]["Passages"]
metadata = hit["_source"]["Metadata"]
#score_1=hit['_score']
passage_embedding = np.array(hit["_source"]["Embedding"]['value'])
score = 1 - cosine(question_embedding, passage_embedding) # Calculate cosine similarity
relevant_passages.append({"passage": passage, "metadata": metadata, "score": score})
#Sort the relevant passages by score in descending order
relevant_passages.sort(key=lambda x: x["score"], reverse=True)
#Get the top 3 relevant passages and their metadata
top_3_relevant_passages = relevant_passages[:3]
return top_3_relevant_passages
#create the elastic search instance
es = Elasticsearch(
"https://92d997736474439dae5ccfaedc2ad990.us-central1.gcp.cloud.es.io:443",
api_key="Ym16RzI0b0JIcXpRTU9NQUNUNE46YnBmaUtCWHdTNXlnN1dZR2w4Rllqdw=="
)
#app.logger.info(msg='es instance created')
"""
Question asking endpoint
"""
# Define an endpoint for receiving a user question via POST request
@app.route('/ask', methods=['POST'])
def receive_question():
model = SentenceTransformer('sentence-transformers/multi-qa-distilbert-cos-v1')
# Get the question from the request JSON data
question_data = request.get_json()
user_question = question_data.get('question')
# Validate request data
if not validate_request(question_data):
#app.logger.error(msg='Invalid request data')
return jsonify({'error': 'Invalid request data'}), 400
#return response
question = user_question
question_embedding = model.encode(question)
question_embedding=question_embedding.tolist()
#index name created on elasticsearch
index_name="search-passagemetadataemb"
#search
response = es.search(
index=index_name,
q=question,
size=3
)
top_3=Extraction(response=response,question_embedding=question_embedding)
results={}
id=0 # id for different passages
for passage_info in top_3:
results[f"Passage {id}:"]=passage_info["passage"]
results[f"Metadata {id}:"]= passage_info["metadata"]
results[f"Score {id}:"]= passage_info["score"]
id=id+1
# Respond with a confirmation message
response = {'message': 'Question received successfully',
'qustion': user_question,
'results': results
}
# Validate request data
if not validate_response(response):
return jsonify({'error': 'Invalid response data'}), 500
return jsonify(response)
"""
File Upload endpoint
"""
@app.route('/upload_csv', methods=['POST'])
def upload_document():
# Get the uploaded file from the request
uploaded_file = request.files['file']
if uploaded_file:
app.logger.info(msg='file uploaded')
# Process the uploaded file
# Here, we save it with a unique name
file_path = 'uploads/' + uploaded_file.filename
uploaded_file.save(file_path)
df=pd.read_csv(file_path)
#Convert embeddings to np array
df['Embedding'] = df['Embedding'].apply(lambda x: np.fromstring(x.replace('\n', '')[1:-1], sep=' '))
# Index the document in Elasticsearch
documents=prepare_documents(df)
# Create a function to prepare documents for indexing
index_name = "search-passagemetadataemb" #index name created on elasticsearch
#index
for doc_id, document in enumerate(documents):
es.index(index=index_name, body=document, id=doc_id)
return jsonify({'message': 'Document uploaded and indexed successfully'})
return jsonify({'message': 'No file uploaded'}) |