fschwartzer's picture
Update app.py
31810c1 verified
raw
history blame
7.98 kB
import streamlit as st
import pandas as pd
from transformers import BartForConditionalGeneration, TapexTokenizer, T5ForConditionalGeneration, T5Tokenizer
from prophet import Prophet
import datetime
import sentencepiece as spm
# Caminho para o arquivo CSS, ajuste conforme a estrutura do seu projeto
css_file = "style.css"
# Abrindo e lendo o arquivo CSS
with open(css_file, "r") as css:
css_style = css.read()
# Markdown combinado com a importação da fonte e o HTML
html_content = f"""
<style>
{css_style}
@import url('https://fonts.googleapis.com/css2?family=Kanit:wght@700&display=swap');
</style>
<div style='display: flex; flex-direction: column; align-items: flex-start;'>
<div style='display: flex; align-items: center;'>
<div style='width: 20px; height: 40px; background-color: green; margin-right: 1px;'></div>
<div style='width: 20px; height: 40px; background-color: red; margin-right: 1px;'></div>
<div style='width: 20px; height: 40px; background-color: yellow; margin-right: 20px;'></div>
<span style='font-size: 50px; font-weight: normal; font-family: "Kanit", sans-serif;'><strong>PROTAX</strong></span>
</div>
<div style='text-align: left; width: 100%;'>
<span style='font-size: 20px; font-weight: normal; color: #333; font-family: "Kanit", sans-serif'>
<strong>PRO</strong>phet & <strong>TA</strong>pex E<strong>X</strong>plorer</span>
</div>
</div>
"""
# Aplicar o markdown combinado no Streamlit
st.markdown(html_content, unsafe_allow_html=True)
# File upload interface
uploaded_file = st.file_uploader("Carregue um arquivo CSV ou XLSX", type=['csv', 'xlsx'])
if uploaded_file:
if 'all_anomalies' not in st.session_state:
with st.spinner('Aplicando modelo de série temporal...'):
# Load the file into a DataFrame
if uploaded_file.name.endswith('.csv'):
df = pd.read_csv(uploaded_file, quotechar='"', encoding='utf-8')
elif uploaded_file.name.endswith('.xlsx'):
df = pd.read_excel(uploaded_file)
# Data preprocessing for Prophet
new_df = df.iloc[2:, 9:-1].fillna(0)
new_df.columns = df.iloc[1, 9:-1]
new_df.columns = new_df.columns.str.replace(r" \(\d+\)", "", regex=True)
month_dict = {
'Jan': '01', 'Fev': '02', 'Mar': '03', 'Abr': '04',
'Mai': '05', 'Jun': '06', 'Jul': '07', 'Ago': '08',
'Set': '09', 'Out': '10', 'Nov': '11', 'Dez': '12'
}
def convert_column_name(column_name):
if column_name == 'Rótulos de Linha':
return column_name
parts = column_name.split('/')
month = parts[0].strip()
year = parts[1].strip()
year = ''.join(filter(str.isdigit, year))
month_number = month_dict.get(month, '00')
return f"{month_number}/{year}"
new_df.columns = [convert_column_name(col) for col in new_df.columns]
new_df.columns = pd.to_datetime(new_df.columns, errors='coerce')
new_df.rename(columns={new_df.columns[0]: 'Rotulo'}, inplace=True)
df_clean = new_df.copy()
# Create an empty DataFrame to store all anomalies
all_anomalies = pd.DataFrame()
# Process each row in the DataFrame
for index, row in df_clean.iterrows():
data = pd.DataFrame({
'ds': [col for col in df_clean.columns if isinstance(col, pd.Timestamp)],
'y': row[[isinstance(col, pd.Timestamp) for col in df_clean.columns]].values
})
data = data[data['y'] > 0].reset_index(drop=True)
if data.empty or len(data) < 2:
print(f"Skipping group {row['Rotulo']} because there are less than 2 non-zero observations.")
continue
try:
model = Prophet(interval_width=0.95)
model.fit(data)
except ValueError as e:
print(f"Skipping group {row['Rotulo']} due to error: {e}")
continue
future = model.make_future_dataframe(periods=12, freq='M')
forecast = model.predict(future)
num_real = len(data)
num_forecast = len(forecast)
real_values = list(data['y']) + [None] * (num_forecast - num_real)
forecast['real'] = real_values
anomalies = forecast[(forecast['real'] < forecast['yhat_lower']) | (forecast['real'] > forecast['yhat_upper'])]
anomalies['Group'] = row['Rotulo']
all_anomalies = pd.concat([all_anomalies, anomalies[['ds', 'real', 'Group']]], ignore_index=True)
# Store the result in session state
all_anomalies.rename(columns={"ds": "datetime", "real": "monetary value", "Group": "group"}, inplace=True)
all_anomalies = all_anomalies[all_anomalies['monetary value'].astype('float') >= 10,000,000.00]
all_anomalies['monetary value'] = all_anomalies['monetary value'].apply(lambda x: f"{x:.2f}")
all_anomalies.sort_values(by=['monetary value'], ascending=False, inplace=True)
all_anomalies = all_anomalies.fillna('').astype(str)
st.session_state['all_anomalies'] = all_anomalies
# Load translation models
pt_en_translator = T5ForConditionalGeneration.from_pretrained("unicamp-dl/translation-pt-en-t5")
en_pt_translator = T5ForConditionalGeneration.from_pretrained("unicamp-dl/translation-en-pt-t5")
tokenizer = T5Tokenizer.from_pretrained("unicamp-dl/translation-pt-en-t5")
# Load TAPEX model
tapex_model = BartForConditionalGeneration.from_pretrained("microsoft/tapex-large-finetuned-wtq")
tapex_tokenizer = TapexTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")
def translate(text, model, tokenizer, source_lang="pt", target_lang="en"):
input_ids = tokenizer.encode(text, return_tensors="pt", add_special_tokens=True)
outputs = model.generate(input_ids)
translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return translated_text
def response(user_question, table_data):
question_en = translate(user_question, pt_en_translator, tokenizer, source_lang="pt", target_lang="en")
encoding = tapex_tokenizer(table=table_data, query=[question_en], padding=True, return_tensors="pt", truncation=True)
outputs = tapex_model.generate(**encoding)
response_en = tapex_tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
response_pt = translate(response_en, en_pt_translator, tokenizer, source_lang="en", target_lang="pt")
return response_pt
# Streamlit interface
st.dataframe(st.session_state['all_anomalies'].head())
# Chat history
if 'history' not in st.session_state:
st.session_state['history'] = []
user_question = st.text_input("Escreva sua questão aqui:", "")
if user_question:
st.session_state['history'].append(('👤', user_question))
st.markdown(f"**👤 {user_question}**")
bot_response = response(user_question, st.session_state['all_anomalies'])
st.session_state['history'].append(('🤖', bot_response))
st.markdown(f"<div style='text-align: right'>**🤖 {bot_response}**</div>", unsafe_allow_html=True)
if st.button("Limpar"):
st.session_state['history'] = []
for sender, message in st.session_state['history']:
if sender == '👤':
st.markdown(f"**👤 {message}**")
elif sender == '🤖':
st.markdown(f"<div style='text-align: right'>**🤖 {message}**</div>", unsafe_allow_html=True)
else:
st.warning("Por favor, carregue um arquivo CSV ou XLSX para começar.")