Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -23,11 +23,11 @@ html_content = f"""
|
|
23 |
<div style='width: 20px; height: 40px; background-color: green; margin-right: 1px;'></div>
|
24 |
<div style='width: 20px; height: 40px; background-color: red; margin-right: 1px;'></div>
|
25 |
<div style='width: 20px; height: 40px; background-color: yellow; margin-right: 20px;'></div>
|
26 |
-
<span style='font-size: 50px; font-weight: normal; font-family: "Kanit", sans-serif;'><strong>
|
27 |
</div>
|
28 |
<div style='text-align: left; width: 100%;'>
|
29 |
<span style='font-size: 20px; font-weight: normal; color: #333; font-family: "Kanit", sans-serif'>
|
30 |
-
|
31 |
</div>
|
32 |
</div>
|
33 |
"""
|
@@ -115,55 +115,55 @@ if uploaded_file:
|
|
115 |
all_anomalies.sort_values(by=['monetary value'], ascending=False, inplace=True)
|
116 |
all_anomalies = all_anomalies.fillna('').astype(str)
|
117 |
st.session_state['all_anomalies'] = all_anomalies
|
118 |
-
|
119 |
-
# Load translation models
|
120 |
-
pt_en_translator = T5ForConditionalGeneration.from_pretrained("unicamp-dl/translation-pt-en-t5")
|
121 |
-
en_pt_translator = T5ForConditionalGeneration.from_pretrained("unicamp-dl/translation-en-pt-t5")
|
122 |
-
tokenizer = T5Tokenizer.from_pretrained("unicamp-dl/translation-pt-en-t5")
|
123 |
-
|
124 |
-
# Load TAPEX model
|
125 |
-
tapex_model = BartForConditionalGeneration.from_pretrained("microsoft/tapex-large-finetuned-wtq")
|
126 |
-
tapex_tokenizer = TapexTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")
|
127 |
-
|
128 |
-
def translate(text, model, tokenizer, source_lang="pt", target_lang="en"):
|
129 |
-
input_ids = tokenizer.encode(text, return_tensors="pt", add_special_tokens=True)
|
130 |
-
outputs = model.generate(input_ids)
|
131 |
-
translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
132 |
-
return translated_text
|
133 |
-
|
134 |
-
def response(user_question, table_data):
|
135 |
-
question_en = translate(user_question, pt_en_translator, tokenizer, source_lang="pt", target_lang="en")
|
136 |
-
encoding = tapex_tokenizer(table=table_data, query=[question_en], padding=True, return_tensors="pt", truncation=True)
|
137 |
-
outputs = tapex_model.generate(**encoding)
|
138 |
-
response_en = tapex_tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
|
139 |
-
response_pt = translate(response_en, en_pt_translator, tokenizer, source_lang="en", target_lang="pt")
|
140 |
-
return response_pt
|
141 |
-
|
142 |
-
# Streamlit interface
|
143 |
-
st.dataframe(st.session_state['all_anomalies'].head())
|
144 |
-
|
145 |
-
# Chat history
|
146 |
-
if 'history' not in st.session_state:
|
147 |
-
st.session_state['history'] = []
|
148 |
-
|
149 |
-
user_question = st.text_input("Escreva sua questão aqui:", "")
|
150 |
-
|
151 |
-
if user_question:
|
152 |
-
st.session_state['history'].append(('👤', user_question))
|
153 |
-
st.markdown(f"**👤 {user_question}**")
|
154 |
-
|
155 |
-
bot_response = response(user_question, st.session_state['all_anomalies'])
|
156 |
-
|
157 |
-
st.session_state['history'].append(('🤖', bot_response))
|
158 |
-
st.markdown(f"<div style='text-align: right'>**🤖 {bot_response}**</div>", unsafe_allow_html=True)
|
159 |
-
|
160 |
-
if st.button("Limpar"):
|
161 |
-
st.session_state['history'] = []
|
162 |
-
|
163 |
-
for sender, message in st.session_state['history']:
|
164 |
-
if sender == '👤':
|
165 |
-
st.markdown(f"**👤 {message}**")
|
166 |
-
elif sender == '🤖':
|
167 |
-
st.markdown(f"<div style='text-align: right'>**🤖 {message}**</div>", unsafe_allow_html=True)
|
168 |
else:
|
169 |
st.warning("Por favor, carregue um arquivo CSV ou XLSX para começar.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
<div style='width: 20px; height: 40px; background-color: green; margin-right: 1px;'></div>
|
24 |
<div style='width: 20px; height: 40px; background-color: red; margin-right: 1px;'></div>
|
25 |
<div style='width: 20px; height: 40px; background-color: yellow; margin-right: 20px;'></div>
|
26 |
+
<span style='font-size: 50px; font-weight: normal; font-family: "Kanit", sans-serif;'><strong>NOSTRADAMUS</strong></span>
|
27 |
</div>
|
28 |
<div style='text-align: left; width: 100%;'>
|
29 |
<span style='font-size: 20px; font-weight: normal; color: #333; font-family: "Kanit", sans-serif'>
|
30 |
+
Meta Prophet + Microsoft TAPEX</span>
|
31 |
</div>
|
32 |
</div>
|
33 |
"""
|
|
|
115 |
all_anomalies.sort_values(by=['monetary value'], ascending=False, inplace=True)
|
116 |
all_anomalies = all_anomalies.fillna('').astype(str)
|
117 |
st.session_state['all_anomalies'] = all_anomalies
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
else:
|
119 |
st.warning("Por favor, carregue um arquivo CSV ou XLSX para começar.")
|
120 |
+
|
121 |
+
# Load translation models
|
122 |
+
pt_en_translator = T5ForConditionalGeneration.from_pretrained("unicamp-dl/translation-pt-en-t5")
|
123 |
+
en_pt_translator = T5ForConditionalGeneration.from_pretrained("unicamp-dl/translation-en-pt-t5")
|
124 |
+
tokenizer = T5Tokenizer.from_pretrained("unicamp-dl/translation-pt-en-t5")
|
125 |
+
|
126 |
+
# Load TAPEX model
|
127 |
+
tapex_model = BartForConditionalGeneration.from_pretrained("microsoft/tapex-large-finetuned-wtq")
|
128 |
+
tapex_tokenizer = TapexTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")
|
129 |
+
|
130 |
+
def translate(text, model, tokenizer, source_lang="pt", target_lang="en"):
|
131 |
+
input_ids = tokenizer.encode(text, return_tensors="pt", add_special_tokens=True)
|
132 |
+
outputs = model.generate(input_ids)
|
133 |
+
translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
134 |
+
return translated_text
|
135 |
+
|
136 |
+
def response(user_question, table_data):
|
137 |
+
question_en = translate(user_question, pt_en_translator, tokenizer, source_lang="pt", target_lang="en")
|
138 |
+
encoding = tapex_tokenizer(table=table_data, query=[question_en], padding=True, return_tensors="pt", truncation=True)
|
139 |
+
outputs = tapex_model.generate(**encoding)
|
140 |
+
response_en = tapex_tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
|
141 |
+
response_pt = translate(response_en, en_pt_translator, tokenizer, source_lang="en", target_lang="pt")
|
142 |
+
return response_pt
|
143 |
+
|
144 |
+
# Streamlit interface
|
145 |
+
st.dataframe(st.session_state['all_anomalies'].head())
|
146 |
+
|
147 |
+
# Chat history
|
148 |
+
if 'history' not in st.session_state:
|
149 |
+
st.session_state['history'] = []
|
150 |
+
|
151 |
+
user_question = st.text_input("Escreva sua questão aqui:", "")
|
152 |
+
|
153 |
+
if user_question:
|
154 |
+
st.session_state['history'].append(('👤', user_question))
|
155 |
+
st.markdown(f"**👤 {user_question}**")
|
156 |
+
|
157 |
+
bot_response = response(user_question, st.session_state['all_anomalies'])
|
158 |
+
|
159 |
+
st.session_state['history'].append(('🤖', bot_response))
|
160 |
+
st.markdown(f"<div style='text-align: right'>**🤖 {bot_response}**</div>", unsafe_allow_html=True)
|
161 |
+
|
162 |
+
if st.button("Limpar"):
|
163 |
+
st.session_state['history'] = []
|
164 |
+
|
165 |
+
for sender, message in st.session_state['history']:
|
166 |
+
if sender == '👤':
|
167 |
+
st.markdown(f"**👤 {message}**")
|
168 |
+
elif sender == '🤖':
|
169 |
+
st.markdown(f"<div style='text-align: right'>**🤖 {message}**</div>", unsafe_allow_html=True)
|