File size: 6,801 Bytes
0404f33
96ede08
 
0404f33
 
 
70415ca
455ee5c
 
b59c2f5
0404f33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
455ee5c
 
 
 
 
 
 
 
 
 
 
 
 
7542eff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
de6a3e2
 
7542eff
 
 
 
 
 
 
 
de6a3e2
7542eff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
741bfcf
7542eff
455ee5c
 
 
 
8a6ebbd
 
 
 
 
 
455ee5c
0404f33
 
 
 
 
 
 
70415ca
0404f33
 
610a7ff
fc8098c
610a7ff
70415ca
0404f33
 
 
198d8ad
 
afe2864
ac49906
ef9b050
70415ca
 
0404f33
c8df5b8
0404f33
 
70415ca
 
0404f33
 
70415ca
 
 
 
 
 
13d9d91
ef9b050
 
 
 
 
 
 
 
 
 
 
63b0b33
c8df5b8
63b0b33
ef9b050
 
142db9e
c8df5b8
ef9b050
 
 
d4a8110
 
ef9b050
 
63b0b33
 
ef9b050
 
d4a8110
 
7a44486
cf3fa24
 
455ee5c
 
 
142db9e
455ee5c
cf3fa24
286dfa8
 
cf3fa24
 
286dfa8
 
 
cf3fa24
 
 
 
ef9b050
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
from pandasai.llm import GoogleGemini
import streamlit as st
import os
import pandas as pd
from pandasai import SmartDataframe
from pandasai.responses.response_parser import  ResponseParser
from st_on_hover_tabs import on_hover_tabs
from ydata_profiling import ProfileReport
import google.generativeai as genai
import json

class StreamLitResponse(ResponseParser):
        def __init__(self,context) -> None:
              super().__init__(context)
        def format_dataframe(self,result):
               st.dataframe(result['value'])
               return
        def format_plot(self,result):
               st.image(result['value'])
               return
        def format_other(self, result):
               st.write(result['value'])
               return

gemini_api_key = os.environ['Gemini']

genai.configure(api_key=gemini_api_key)

generation_config = {
  "temperature": 0.2,
  "top_p": 0.95,
  "max_output_tokens": 5000,
}

model = genai.GenerativeModel(
  model_name="gemini-1.5-flash",
  generation_config=generation_config,
)

def calculate_kpis(df):
    """
    Calculates key performance indicators from a given transaction dataset.

    Args:
        df: Pandas DataFrame containing transaction data.

    Returns:
        A JSON object containing the calculated KPIs.
    """

    # Calculate Total Revenue
    total_revenue = df['Price'] * df['Quantity'].sum()

    # Calculate Top Five Products by Revenue
    if df['Description'].nunique() > 5:
        top_five_products = df.groupby('Description')['Price'].sum().nlargest(5).index.tolist()
    else:
        top_five_product = "there are less than 5 products in this dataset"

    if df['Branch_Name'].nunique() > 1:
        best_branch = df.groupby('Branch_Name')['Price'].sum().nlargest(1).index.tolist()
    else:
        best_branch = "there is only one branch in this dataset"
        
    # Calculate Average Order Value (AOV)
    aov = df.groupby('Receipt No_')['Price'].sum().mean()

    # Calculate Customer Purchase Frequency (Requires more data for accurate calculation)
    # Assuming 'Member Card No_' is a unique identifier for customers
    customer_purchase_frequency = df.groupby('Customer_Name')['Receipt No_'].nunique().mean()

    # Calculate Estimated Customer Lifetime Value (CLTV) (Requires more data for accurate calculation)
    # Assuming a simple CLTV model based on AOV and purchase frequency
    estimated_cltv = aov * customer_purchase_frequency * 12  # Assuming annual value

    # Create JSON output
    kpis = {
        "total_revenue": total_revenue,
        "top_five_products": top_five_products,
        "average_order_value": aov,
        "customer_purchase_frequency": customer_purchase_frequency,
        "estimated_cltv": estimated_cltv,
        "best_performing_branch": best_branch
    }

    return kpis


def get_pandas_profile(df):
    profile = ProfileReport(df, title="Profiling Report")
    json_profile = profile.to_json()
    dict_p = json.loads(json_profile)
    keys_to_keep = ['analysis', 'table', 'correlations', 'alerts', 'sample']

# Assuming your dictionary is named 'my_dict'
    filtered_dict = {key: dict_p[key] for key in keys_to_keep}
    return filtered_dict
    
def generateResponse(dataFrame,prompt):
        llm = GoogleGemini(api_key=gemini_api_key)
        pandas_agent = SmartDataframe(dataFrame,config={"llm":llm, "response_parser":StreamLitResponse})
        answer = pandas_agent.chat(prompt)
        return answer

st.write("# Brave Retail Insights")
st.markdown('<style>' + open('./style.css').read() + '</style>', unsafe_allow_html=True)
st.write("##### Engage in insightful conversations with your data through powerful visualizations")
with st.sidebar:
    st.title("Brave Retail Insights")
    st.sidebar.image("IMG_1181.jpeg", use_column_width=True)
    tabs = on_hover_tabs(tabName=['Chat', 'Reports'], 
                         iconName=['chat', 'dashboard'], default_choice=0)
        


uploaded_file = "bon_marche.csv"
#uploaded_file = "healthcare_dataset.csv"
if tabs =='Chat':
    df = pd.read_csv(uploaded_file)
    st.subheader("Brave Retail Chat")
    st.write("Get visualizations and analysis from our Gemini powered agent")
        
        # Read the CSV file
    #df = pd.read_csv(uploaded_file)

        # Display the data
    with st.expander("Preview"):
        st.write(df.head())

        # Plot the data
    user_input = st.text_input("Type your message here",placeholder="Ask me about your data")
    if user_input:
        answer = generateResponse(dataFrame=df,prompt=user_input)
        st.write(answer)

elif tabs == 'Reports':
    df = pd.read_csv(uploaded_file)
    

# Streamlit App
    st.subheader("Reports")
    st.write("Filter by Branch Name or Product to generate report")

# Display original 

# Filtering Interface
    st.write("Filtering Options")
    branch_names = df['Branch_Name'].unique().tolist()
    #product_names = df['Description'].unique().tolist()
    selected_branches = st.multiselect('Select Branch(es) Name', branch_names, default=branch_names)
    #selected_products = st.multiselect('Select product(s) Name', product_names, default=product_names)

# Button to apply filters
    if st.button('Apply Filters and Generate report'):
        df = pd.read_csv(uploaded_file)
        filtered_df = df.copy()
    
    # Apply Branch Name Filter
        if selected_branches:
            filtered_df = filtered_df[filtered_df['Branch_Name'].isin(selected_branches)]
    
    # Apply Description Filter
    #if selected_products:
       # filtered_df = filtered_df[filtered_df['Product_Name'].isin(selected_products)]
    
    # Display filtered DataFrame
            st.write("Filtered DataFrame")
            with st.expander("Preview"):
                st.write(filtered_df.head())
            with st.spinner("Generating Report, Please Wait...."):
                prompt = """
You are an expert business analyst. Analyze the following data and generate a comprehensive and insightful business report, including appropriate key perfomance indicators and reccomendations.

data:
"""  +  str(calculate_kpis(filtered_df)) + str(get_pandas_profile(filtered_df))

                response = model.generate_content(prompt)
                response2 = generateResponse(filtered_df, "pie chart of revenue by branch")
                response3 = generateResponse(filtered_df, "bar chart of of most popular products")
                report = response.text
                st.markdown(report)
                # Display the generated images
                st.markdown(response2)
                st.markdown(response3)
                st.success("Report Generated!")
        else:
            st.write("Filtered DataFrame")
            st.write("Click 'Apply Filters' to see the filtered data.")