Spaces:

fschwartzer
/

streamlit_chatbot

Running

App Files Files Community

fschwartzer commited on Sep 11, 2024

Commit

caea1f5

verified ·

1 Parent(s): bafee93

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -8

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import pandas as pd
 import torch
 from transformers import pipeline
 import datetime
 # Load the CSV file
 df = pd.read_csv("anomalies.csv", quotechar='"')
@@ -13,25 +14,39 @@ df['real'] = df['real'].apply(lambda x: f"{x:.2f}")
 # Fill NaN values and convert all columns to strings
 df = df.fillna('').astype(str)
-# Subset the DataFrame for the relevant query
-def subset_dataframe(df, date, group_keyword):
-    subset_df = df[(df['ds'] == date) & (df['Group'].str.contains(group_keyword, case=False))]
-    return subset_df
 # Function to generate a response using the TAPAS model
 def response(user_question, df):
     a = datetime.datetime.now()
-    # Subset the DataFrame for December 2022 and IPVA
-    subset_df = subset_dataframe(df, "2022-12-01", "IPVA")
     # Initialize the TAPAS model
     tqa = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq",
                    tokenizer_kwargs={"clean_up_tokenization_spaces": False})
     # Debugging information
-    print("Subset DataFrame shape:", subset_df.shape)
-    print("Subset DataFrame head:\n", subset_df.head())
     print("User question:", user_question)
     # Query the TAPAS model

 import torch
 from transformers import pipeline
 import datetime
+from rapidfuzz import process, fuzz
 # Load the CSV file
 df = pd.read_csv("anomalies.csv", quotechar='"')
 # Fill NaN values and convert all columns to strings
 df = df.fillna('').astype(str)
+# Function to filter the DataFrame using RapidFuzz for dates
+def filter_dataframe_by_date(df, date_str, threshold=80):
+    # Apply fuzzy matching on the 'ds' (date) column
+    matches = process.extract(date_str, df['ds'], scorer=fuzz.token_sort_ratio, limit=None)
+    filtered_rows = [match[2] for match in matches if match[1] >= threshold]
+    return df.iloc[filtered_rows]
+# Function to filter the DataFrame using RapidFuzz for groups
+def filter_dataframe_by_group(df, group_keyword, threshold=80):
+    # Apply fuzzy matching on the 'Group' column
+    matches = process.extract(group_keyword, df['Group'], scorer=fuzz.token_sort_ratio, limit=None)
+    filtered_rows = [match[2] for match in matches if match[1] >= threshold]
+    return df.iloc[filtered_rows]
 # Function to generate a response using the TAPAS model
 def response(user_question, df):
     a = datetime.datetime.now()
+    # Extract date and group keywords from the user question
+    date_str = "December 2022"  # Example; you'd extract this from the user question
+    group_keyword = "IPVA"
+    # Filter the DataFrame by date and group
+    subset_df = filter_dataframe_by_date(df, date_str)
+    subset_df = filter_dataframe_by_group(subset_df, group_keyword)
     # Initialize the TAPAS model
     tqa = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq",
                    tokenizer_kwargs={"clean_up_tokenization_spaces": False})
     # Debugging information
+    print("Filtered DataFrame shape:", subset_df.shape)
+    print("Filtered DataFrame head:\n", subset_df.head())
     print("User question:", user_question)
     # Query the TAPAS model