hitz02 commited on
Commit
3b87d69
·
1 Parent(s): 50d3ba1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -0
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow.compat.v1 as tf
2
+ import os
3
+ import shutil
4
+ import csv
5
+ import pandas as pd
6
+ import numpy as np
7
+ import IPython
8
+ import streamlit as st
9
+ import subprocess
10
+ from itertools import islice
11
+ import random
12
+ from transformers import TapasTokenizer, TapasForQuestionAnswering
13
+ tf.get_logger().setLevel('ERROR')
14
+ model_name = 'google/tapas-base-finetuned-wtq'
15
+ model = TapasForQuestionAnswering.from_pretrained(model_name, local_files_only=False)
16
+ tokenizer = TapasTokenizer.from_pretrained(model_name)
17
+ st.set_option('deprecation.showfileUploaderEncoding', False)
18
+ st.title('Query your Table')
19
+ st.header('Upload CSV file')
20
+ uploaded_file = st.file_uploader("Choose your CSV file",type = 'csv')
21
+ placeholder = st.empty()
22
+ if uploaded_file is not None:
23
+ data = pd.read_csv(uploaded_file)
24
+ data.replace(',','', regex=True, inplace=True)
25
+ if st.checkbox('Want to see the data?'):
26
+ placeholder.dataframe(data)
27
+ st.header('Enter your queries')
28
+ input_queries = st.text_input('Type your queries separated by comma(,)',value='')
29
+ input_queries = input_queries.split(',')
30
+ colors1 = ["#"+''.join([random.choice('0123456789ABCDEF') for j in range(6)]) for i in range(len(input_queries))]
31
+ colors2 = ['background-color:'+str(color)+'; color: black' for color in colors1]
32
+ def styling_specific_cell(x,tags,colors):
33
+ df_styler = pd.DataFrame('', index=x.index, columns=x.columns)
34
+ for idx,tag in enumerate(tags):
35
+ for r,c in tag:
36
+ df_styler.iloc[r, c] = colors[idx]
37
+ return df_styler
38
+
39
+ if st.button('Predict Answers'):
40
+ with st.spinner('It will take approx a minute'):
41
+ data = data.astype(str)
42
+ inputs = tokenizer(table=table, queries=queries, padding='max_length', return_tensors="pt")
43
+ outputs = model(**inputs)
44
+ predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions( inputs, outputs.logits.detach(), outputs.logits_aggregation.detach())
45
+
46
+ id2aggregation = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3:"COUNT"}
47
+ aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices]
48
+
49
+ answers = []
50
+
51
+ for coordinates in predicted_answer_coordinates:
52
+ if len(coordinates) == 1:
53
+ # only a single cell:
54
+ answers.append(table.iat[coordinates[0]])
55
+ else:
56
+ # multiple cells
57
+ cell_values = []
58
+ for coordinate in coordinates:
59
+ cell_values.append(table.iat[coordinate])
60
+ answers.append(", ".join(cell_values))
61
+
62
+ st.success('Done! Please check below the answers and its cells highlighted in table above')
63
+
64
+ placeholder.dataframe(data.style.apply(styling_specific_cell,tags=predicted_answer_coordinates,colors=colors2,axis=None))
65
+
66
+ for query, answer, predicted_agg, c in zip(queries, answers, aggregation_predictions_string, colors1):
67
+ st.write('\n')
68
+ st.markdown('<font color={} size=4>**{}**</font>'.format(c,query), unsafe_allow_html=True)
69
+ st.write('\n')
70
+
71
+ if predicted_agg == "NONE" or predicted_agg == 'COUNT':
72
+ st.markdown('**>** '+str(answer))
73
+ else:
74
+ if predicted_agg == 'SUM':
75
+ st.markdown('**>** '+str(sum(answer.split(','))))
76
+ else:
77
+ st.markdown('**>** '+str(np.round(np.mean(answer.split(',')),2)))