File size: 7,714 Bytes
e2af017
 
96bff79
4e678dd
e2af017
e727bfc
e69523f
e727bfc
 
 
 
63c894a
e727bfc
535c2d9
63c894a
535c2d9
 
b375123
 
 
 
f2de8aa
96bff79
e727bfc
 
 
 
 
4e678dd
 
 
 
 
 
 
7212b4f
4e678dd
3b3e7df
4e678dd
 
 
 
 
 
c8e42cd
7212b4f
525bf5b
 
 
 
 
 
c8e42cd
525bf5b
138e488
 
 
 
 
 
 
 
4e678dd
138e488
 
 
 
4e678dd
138e488
 
 
 
4e678dd
138e488
 
4e678dd
138e488
 
4e678dd
138e488
 
 
 
 
 
 
 
378a4bc
7212b4f
 
378a4bc
e69523f
 
 
 
a09ca43
7e9ae9e
 
a09ca43
378a4bc
 
4e678dd
378a4bc
 
a09ca43
 
138e488
a09ca43
 
138e488
a09ca43
 
7e9ae9e
 
a09ca43
 
 
 
 
138e488
b6bad51
138e488
a09ca43
138e488
a09ca43
 
138e488
 
b6bad51
138e488
a09ca43
138e488
 
 
a09ca43
 
 
138e488
 
a09ca43
 
7e9ae9e
 
a09ca43
 
138e488
 
378a4bc
a09ca43
7e9ae9e
 
3b3e7df
49e71cf
 
f068007
378a4bc
c8e42cd
7212b4f
 
 
 
 
 
 
2fa7748
b375123
525bf5b
 
b375123
 
 
 
 
4e678dd
b375123
 
 
7212b4f
 
b6bad51
 
b375123
 
edfa911
 
 
b375123
 
 
4e678dd
edfa911
7212b4f
4e678dd
edfa911
 
 
4e678dd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
import streamlit as st
import pandas as pd
from prophet import Prophet
import openai

# Abrindo e lendo o arquivo CSS
with open("style.css", "r") as css:
    css_style = css.read()

# Markdown combinado com a importação da fonte e o HTML
html_content = f"""
<style>
{css_style}
@import url('https://fonts.googleapis.com/css2?family=Kanit:wght@700&display=swap');
</style>
<div style='display: flex; flex-direction: column; align-items: flex-start;'>
    <div style='display: flex; align-items: center;'>
        <div style='width: 20px; height: 5px; background-color: green; margin-right: 0px;'></div>
        <div style='width: 20px; height: 5px; background-color: red; margin-right: 0px;'></div>
        <div style='width: 20px; height: 5px; background-color: yellow; margin-right: 18px;'></div>
        <span style='font-size: 38px; font-weight: normal; font-family: "Kanit", sans-serif;'>NOSTRADAMUS</span>
    </div>
</div>
"""

# Aplicar o markdown combinado no Streamlit
st.markdown(html_content, unsafe_allow_html=True)

# Configurar a API do OpenRouter
openai.api_base = "https://openrouter.ai/api/v1"
openai.api_key = "<OPENROUTER_API_KEY>"
openai.default_headers = {
    "HTTP-Referer": "<https://huggingface.co/spaces/fschwartzer/streamlit_chatbot/>",  # Optional. Site URL for rankings on openrouter.ai.
    "X-Title": "<Streamlit Chatbot>",  # Optional. Site title for rankings on openrouter.ai.
}

# Function to interact with the model
def response(user_question, all_anomalies):
    prompt = f"Considerando a seguinte tabela:\n{all_anomalies.to_string(index=False)}\nResponda a questão: {user_question}"
    response = openai.ChatCompletion.create(
        model="deepseek/deepseek-r1:free",
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message['content']

# Load and preprocess the data
def load_data(uploaded_file):
    if uploaded_file.name.endswith('.csv'):
        df = pd.read_csv(uploaded_file, quotechar='"', encoding='utf-8')
    elif uploaded_file.name.endswith('.xlsx'):
        df = pd.read_excel(uploaded_file)
    return df

def preprocess_data(df):
    new_df = df.iloc[2:,9:-1].fillna(0)
    new_df.columns = df.iloc[1,9:-1]
    new_df.columns = new_df.columns.str.replace(r" \(\d+\)", "", regex=True)
    month_dict = {
        'Jan': '01', 'Fev': '02', 'Mar': '03', 'Abr': '04',
        'Mai': '05', 'Jun': '06', 'Jul': '07', 'Ago': '08',
        'Set': '09', 'Out': '10', 'Nov': '11', 'Dez': '12'
    }

    def convert_column_name(column_name):
        # Check if the column name is 'Rótulos de Linha'
        if column_name == 'Rótulos de Linha':
            return column_name

        # Otherwise, proceed to convert
        parts = column_name.split('/')
        month = parts[0].strip()
        year = parts[1].strip()

        # Clean year in case there are extra characters
        year = ''.join(filter(str.isdigit, year))

        # Get month number from the dictionary
        month_number = month_dict.get(month, '00')  # Default '00' if month is not found

        # Return formatted date string
        return f"{month_number}/{year}"

    new_df.columns = [convert_column_name(col) for col in new_df.columns]
    new_df.columns = pd.to_datetime(new_df.columns, errors='coerce')
    new_df.rename(columns={new_df.columns[0]: 'Rotulo'}, inplace=True)
    df_clean = new_df.copy()
    return df_clean

# Cache the Prophet computation to avoid recomputing
@st.cache_data
def apply_prophet(df_clean):
    if df_clean.empty:
        st.error("DataFrame está vazio após o pré-processamento.")
        return pd.DataFrame()

    # Debugging: Check structure of df_clean
    #st.write("Estrutura do DataFrame df_clean:")
    #st.write(df_clean)

    # Criar um DataFrame vazio para armazenar todas as anomalias
    all_anomalies = pd.DataFrame()

    # Processar cada linha no DataFrame
    for index, row in df_clean.iterrows():
        # Extract timestamp and value columns
        date_columns = [col for col in df_clean.columns if isinstance(col, pd.Timestamp)]
        data = pd.DataFrame({
            'ds': date_columns,
            'y': row[date_columns].values
        })

        # Debugging: Check the data passed into Prophet
        #st.write(f"Dados para Prophet - Grupo {row['Rotulo']}:")
        #st.write(data)

        # Remove rows where 'y' is zero or missing
        data = data[data['y'] > 0].dropna().reset_index(drop=True)

        # Ensure there's enough data for Prophet to run
        if data.empty or len(data) < 2:
            #st.write(f"Pular grupo {row['Rotulo']} por não ter observações suficientes.")
            continue

        try:
            # Create and fit the Prophet model
            model = Prophet(interval_width=0.95)
            model.fit(data)
        except ValueError as e:
            #st.write(f"Pular grupo {row['Rotulo']} devido ao erro: {e}")
            continue

        # Make future predictions
        future = model.make_future_dataframe(periods=12, freq='M')
        forecast = model.predict(future)

        # Add real values and calculate anomalies
        real_values = list(data['y']) + [None] * (len(forecast) - len(data))
        forecast['real'] = real_values
        anomalies = forecast[(forecast['real'] < forecast['yhat_lower']) | (forecast['real'] > forecast['yhat_upper'])]

        # Debugging: Check the anomalies detected
        #st.write(f"Anomalias detectadas para o grupo {row['Rotulo']}:")
        #st.write(anomalies)

        # Add group label and append anomalies to all_anomalies DataFrame
        anomalies['group'] = row['Rotulo']
        all_anomalies = pd.concat([all_anomalies, anomalies[['ds', 'real', 'group']]], ignore_index=True)

    # Return the dataframe of all anomalies
    st.write(f"Concluída a aplicação do modelo de série tempotal")
    st.write(all_anomalies.head())
    all_anomalies.sort_values(by=['real'], ascending=False, inplace=True)
    all_anomalies['real'] = pd.to_numeric(all_anomalies['real'], errors='coerce')
    all_anomalies = all_anomalies[all_anomalies['real'] >= 10000000.00]
    all_anomalies = all_anomalies.astype(str)
    return all_anomalies

# Initialize session states
if 'all_anomalies' not in st.session_state:
    st.session_state['all_anomalies'] = pd.DataFrame()

if 'history' not in st.session_state:
    st.session_state['history'] = []

tab1, tab2 = st.tabs(["Meta Prophet", "DeepSeek"])

# Interface para carregar arquivo
uploaded_file = st.file_uploader("Carregue um arquivo CSV ou XLSX", type=['csv', 'xlsx'])

with tab1:
    if uploaded_file:
        df = load_data(uploaded_file)
        df_clean = preprocess_data(df)

        if df_clean.empty:
            st.warning("Não há dados válidos para processar.")
        else:
            # Cache the Prophet results
            if st.session_state['all_anomalies'].empty:
                all_anomalies = apply_prophet(df_clean)
                st.session_state['all_anomalies'] = all_anomalies

with tab2:
    if 'all_anomalies' in st.session_state and not st.session_state['all_anomalies'].empty:
        user_question = st.text_input("Escreva sua questão aqui:", "")
        if user_question:
            bot_response = response(user_question, st.session_state['all_anomalies'])
            st.session_state['history'].append(('👤', user_question))
            st.session_state['history'].append(('🤖', bot_response))

        for sender, message in st.session_state['history']:
            st.markdown(f"**{sender} {message}**")

        if st.button("Limpar histórico"):
            st.session_state['history'] = []
    else:
        st.warning("Por favor, processe os dados no Meta Prophet primeiro.")