Spaces:

fschwartzer
/

streamlit_chatbot

Running

fschwartzer commited on Sep 11, 2024

Commit

bafee93

verified ·

1 Parent(s): de6d203

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import torch
 from transformers import pipeline
 import datetime
-# Load the CSV file and ensure proper formatting
 df = pd.read_csv("anomalies.csv", quotechar='"')
 # Convert 'real' column to standard float format and then to strings
@@ -13,25 +13,30 @@ df['real'] = df['real'].apply(lambda x: f"{x:.2f}")
 # Fill NaN values and convert all columns to strings
 df = df.fillna('').astype(str)
-# Truncate long strings in 'Group' column if necessary
-df['Group'] = df['Group'].str.slice(0, 255)
 # Function to generate a response using the TAPAS model
 def response(user_question, df):
     a = datetime.datetime.now()
     # Initialize the TAPAS model
     tqa = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq",
                    tokenizer_kwargs={"clean_up_tokenization_spaces": False})
     # Debugging information
-    print("DataFrame shape:", df.shape)
-    print("DataFrame head:\n", df.head())
     print("User question:", user_question)
     # Query the TAPAS model
     try:
-        answer = tqa(table=df, query=user_question)['answer']
     except IndexError as e:
         print(f"Error: {e}")
         answer = "Error occurred: " + str(e)

 from transformers import pipeline
 import datetime
+# Load the CSV file
 df = pd.read_csv("anomalies.csv", quotechar='"')
 # Convert 'real' column to standard float format and then to strings
 # Fill NaN values and convert all columns to strings
 df = df.fillna('').astype(str)
+# Subset the DataFrame for the relevant query
+def subset_dataframe(df, date, group_keyword):
+    subset_df = df[(df['ds'] == date) & (df['Group'].str.contains(group_keyword, case=False))]
+    return subset_df
 # Function to generate a response using the TAPAS model
 def response(user_question, df):
     a = datetime.datetime.now()
+    # Subset the DataFrame for December 2022 and IPVA
+    subset_df = subset_dataframe(df, "2022-12-01", "IPVA")
     # Initialize the TAPAS model
     tqa = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq",
                    tokenizer_kwargs={"clean_up_tokenization_spaces": False})
     # Debugging information
+    print("Subset DataFrame shape:", subset_df.shape)
+    print("Subset DataFrame head:\n", subset_df.head())
     print("User question:", user_question)
     # Query the TAPAS model
     try:
+        answer = tqa(table=subset_df, query=user_question)['answer']
     except IndexError as e:
         print(f"Error: {e}")
         answer = "Error occurred: " + str(e)