fschwartzer commited on
Commit
c8e42cd
·
verified ·
1 Parent(s): 4f9416a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -52
app.py CHANGED
@@ -23,11 +23,11 @@ html_content = f"""
23
  <div style='width: 20px; height: 40px; background-color: green; margin-right: 1px;'></div>
24
  <div style='width: 20px; height: 40px; background-color: red; margin-right: 1px;'></div>
25
  <div style='width: 20px; height: 40px; background-color: yellow; margin-right: 20px;'></div>
26
- <span style='font-size: 50px; font-weight: normal; font-family: "Kanit", sans-serif;'><strong>PROTAX</strong></span>
27
  </div>
28
  <div style='text-align: left; width: 100%;'>
29
  <span style='font-size: 20px; font-weight: normal; color: #333; font-family: "Kanit", sans-serif'>
30
- <strong>PRO</strong>phet & <strong>TA</strong>pex e<strong>X</strong>plorer</span>
31
  </div>
32
  </div>
33
  """
@@ -115,55 +115,55 @@ if uploaded_file:
115
  all_anomalies.sort_values(by=['monetary value'], ascending=False, inplace=True)
116
  all_anomalies = all_anomalies.fillna('').astype(str)
117
  st.session_state['all_anomalies'] = all_anomalies
118
-
119
- # Load translation models
120
- pt_en_translator = T5ForConditionalGeneration.from_pretrained("unicamp-dl/translation-pt-en-t5")
121
- en_pt_translator = T5ForConditionalGeneration.from_pretrained("unicamp-dl/translation-en-pt-t5")
122
- tokenizer = T5Tokenizer.from_pretrained("unicamp-dl/translation-pt-en-t5")
123
-
124
- # Load TAPEX model
125
- tapex_model = BartForConditionalGeneration.from_pretrained("microsoft/tapex-large-finetuned-wtq")
126
- tapex_tokenizer = TapexTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")
127
-
128
- def translate(text, model, tokenizer, source_lang="pt", target_lang="en"):
129
- input_ids = tokenizer.encode(text, return_tensors="pt", add_special_tokens=True)
130
- outputs = model.generate(input_ids)
131
- translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
132
- return translated_text
133
-
134
- def response(user_question, table_data):
135
- question_en = translate(user_question, pt_en_translator, tokenizer, source_lang="pt", target_lang="en")
136
- encoding = tapex_tokenizer(table=table_data, query=[question_en], padding=True, return_tensors="pt", truncation=True)
137
- outputs = tapex_model.generate(**encoding)
138
- response_en = tapex_tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
139
- response_pt = translate(response_en, en_pt_translator, tokenizer, source_lang="en", target_lang="pt")
140
- return response_pt
141
-
142
- # Streamlit interface
143
- st.dataframe(st.session_state['all_anomalies'].head())
144
-
145
- # Chat history
146
- if 'history' not in st.session_state:
147
- st.session_state['history'] = []
148
-
149
- user_question = st.text_input("Escreva sua questão aqui:", "")
150
-
151
- if user_question:
152
- st.session_state['history'].append(('👤', user_question))
153
- st.markdown(f"**👤 {user_question}**")
154
-
155
- bot_response = response(user_question, st.session_state['all_anomalies'])
156
-
157
- st.session_state['history'].append(('🤖', bot_response))
158
- st.markdown(f"<div style='text-align: right'>**🤖 {bot_response}**</div>", unsafe_allow_html=True)
159
-
160
- if st.button("Limpar"):
161
- st.session_state['history'] = []
162
-
163
- for sender, message in st.session_state['history']:
164
- if sender == '👤':
165
- st.markdown(f"**👤 {message}**")
166
- elif sender == '🤖':
167
- st.markdown(f"<div style='text-align: right'>**🤖 {message}**</div>", unsafe_allow_html=True)
168
  else:
169
  st.warning("Por favor, carregue um arquivo CSV ou XLSX para começar.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  <div style='width: 20px; height: 40px; background-color: green; margin-right: 1px;'></div>
24
  <div style='width: 20px; height: 40px; background-color: red; margin-right: 1px;'></div>
25
  <div style='width: 20px; height: 40px; background-color: yellow; margin-right: 20px;'></div>
26
+ <span style='font-size: 50px; font-weight: normal; font-family: "Kanit", sans-serif;'><strong>NOSTRADAMUS</strong></span>
27
  </div>
28
  <div style='text-align: left; width: 100%;'>
29
  <span style='font-size: 20px; font-weight: normal; color: #333; font-family: "Kanit", sans-serif'>
30
+ Meta Prophet + Microsoft TAPEX</span>
31
  </div>
32
  </div>
33
  """
 
115
  all_anomalies.sort_values(by=['monetary value'], ascending=False, inplace=True)
116
  all_anomalies = all_anomalies.fillna('').astype(str)
117
  st.session_state['all_anomalies'] = all_anomalies
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  else:
119
  st.warning("Por favor, carregue um arquivo CSV ou XLSX para começar.")
120
+
121
+ # Load translation models
122
+ pt_en_translator = T5ForConditionalGeneration.from_pretrained("unicamp-dl/translation-pt-en-t5")
123
+ en_pt_translator = T5ForConditionalGeneration.from_pretrained("unicamp-dl/translation-en-pt-t5")
124
+ tokenizer = T5Tokenizer.from_pretrained("unicamp-dl/translation-pt-en-t5")
125
+
126
+ # Load TAPEX model
127
+ tapex_model = BartForConditionalGeneration.from_pretrained("microsoft/tapex-large-finetuned-wtq")
128
+ tapex_tokenizer = TapexTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")
129
+
130
+ def translate(text, model, tokenizer, source_lang="pt", target_lang="en"):
131
+ input_ids = tokenizer.encode(text, return_tensors="pt", add_special_tokens=True)
132
+ outputs = model.generate(input_ids)
133
+ translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
134
+ return translated_text
135
+
136
+ def response(user_question, table_data):
137
+ question_en = translate(user_question, pt_en_translator, tokenizer, source_lang="pt", target_lang="en")
138
+ encoding = tapex_tokenizer(table=table_data, query=[question_en], padding=True, return_tensors="pt", truncation=True)
139
+ outputs = tapex_model.generate(**encoding)
140
+ response_en = tapex_tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
141
+ response_pt = translate(response_en, en_pt_translator, tokenizer, source_lang="en", target_lang="pt")
142
+ return response_pt
143
+
144
+ # Streamlit interface
145
+ st.dataframe(st.session_state['all_anomalies'].head())
146
+
147
+ # Chat history
148
+ if 'history' not in st.session_state:
149
+ st.session_state['history'] = []
150
+
151
+ user_question = st.text_input("Escreva sua questão aqui:", "")
152
+
153
+ if user_question:
154
+ st.session_state['history'].append(('👤', user_question))
155
+ st.markdown(f"**👤 {user_question}**")
156
+
157
+ bot_response = response(user_question, st.session_state['all_anomalies'])
158
+
159
+ st.session_state['history'].append(('🤖', bot_response))
160
+ st.markdown(f"<div style='text-align: right'>**🤖 {bot_response}**</div>", unsafe_allow_html=True)
161
+
162
+ if st.button("Limpar"):
163
+ st.session_state['history'] = []
164
+
165
+ for sender, message in st.session_state['history']:
166
+ if sender == '👤':
167
+ st.markdown(f"**👤 {message}**")
168
+ elif sender == '🤖':
169
+ st.markdown(f"<div style='text-align: right'>**🤖 {message}**</div>", unsafe_allow_html=True)