fschwartzer commited on
Commit
bafee93
·
verified ·
1 Parent(s): de6d203

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -6
app.py CHANGED
@@ -4,7 +4,7 @@ import torch
4
  from transformers import pipeline
5
  import datetime
6
 
7
- # Load the CSV file and ensure proper formatting
8
  df = pd.read_csv("anomalies.csv", quotechar='"')
9
 
10
  # Convert 'real' column to standard float format and then to strings
@@ -13,25 +13,30 @@ df['real'] = df['real'].apply(lambda x: f"{x:.2f}")
13
  # Fill NaN values and convert all columns to strings
14
  df = df.fillna('').astype(str)
15
 
16
- # Truncate long strings in 'Group' column if necessary
17
- df['Group'] = df['Group'].str.slice(0, 255)
 
 
18
 
19
  # Function to generate a response using the TAPAS model
20
  def response(user_question, df):
21
  a = datetime.datetime.now()
22
 
 
 
 
23
  # Initialize the TAPAS model
24
  tqa = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq",
25
  tokenizer_kwargs={"clean_up_tokenization_spaces": False})
26
 
27
  # Debugging information
28
- print("DataFrame shape:", df.shape)
29
- print("DataFrame head:\n", df.head())
30
  print("User question:", user_question)
31
 
32
  # Query the TAPAS model
33
  try:
34
- answer = tqa(table=df, query=user_question)['answer']
35
  except IndexError as e:
36
  print(f"Error: {e}")
37
  answer = "Error occurred: " + str(e)
 
4
  from transformers import pipeline
5
  import datetime
6
 
7
+ # Load the CSV file
8
  df = pd.read_csv("anomalies.csv", quotechar='"')
9
 
10
  # Convert 'real' column to standard float format and then to strings
 
13
  # Fill NaN values and convert all columns to strings
14
  df = df.fillna('').astype(str)
15
 
16
+ # Subset the DataFrame for the relevant query
17
+ def subset_dataframe(df, date, group_keyword):
18
+ subset_df = df[(df['ds'] == date) & (df['Group'].str.contains(group_keyword, case=False))]
19
+ return subset_df
20
 
21
  # Function to generate a response using the TAPAS model
22
  def response(user_question, df):
23
  a = datetime.datetime.now()
24
 
25
+ # Subset the DataFrame for December 2022 and IPVA
26
+ subset_df = subset_dataframe(df, "2022-12-01", "IPVA")
27
+
28
  # Initialize the TAPAS model
29
  tqa = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq",
30
  tokenizer_kwargs={"clean_up_tokenization_spaces": False})
31
 
32
  # Debugging information
33
+ print("Subset DataFrame shape:", subset_df.shape)
34
+ print("Subset DataFrame head:\n", subset_df.head())
35
  print("User question:", user_question)
36
 
37
  # Query the TAPAS model
38
  try:
39
+ answer = tqa(table=subset_df, query=user_question)['answer']
40
  except IndexError as e:
41
  print(f"Error: {e}")
42
  answer = "Error occurred: " + str(e)