File size: 7,721 Bytes
711cd18
 
 
 
 
 
357410f
 
 
 
 
 
 
 
711cd18
357410f
711cd18
357410f
 
 
e0d7678
357410f
711cd18
 
357410f
 
 
711cd18
e0d7678
357410f
 
 
 
 
 
 
 
 
 
 
 
 
711cd18
357410f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
711cd18
357410f
 
 
 
 
 
 
 
06c4de1
 
31f56d9
a106368
abe468e
e0d7678
06c4de1
 
 
 
 
357410f
06c4de1
 
 
 
6b3e786
06c4de1
6b3e786
 
 
 
0c23cf6
357410f
 
6b3e786
 
 
 
 
 
711cd18
 
357410f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
711cd18
e0d7678
711cd18
e0d7678
357410f
 
 
 
 
e0d7678
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import streamlit as st
import numpy as np
import nltk
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
from typing import Dict, List, Union
from google.cloud import aiplatform
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value
import os
import re
import tempfile

# Function to get credentials from environment variable and create a temporary file
def get_credentials():
    creds_json_str = os.getenv("JSONSTR")  # Get JSON credentials stored as a string
    if creds_json_str is None:
        raise ValueError("GOOGLE_APPLICATION_CREDENTIALS_JSON not found in environment")

    # Create a temporary file
    with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".json") as temp:
        temp.write(creds_json_str)  # Write in JSON format
        temp_filename = temp.name 

    return temp_filename

# Set environment variable for Google application credentials
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = get_credentials()

max_seq_length = 2048
dtype = None
load_in_4bit = True

# Check if 'punkt' is already downloaded, otherwise download it
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')

text_split_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')

# Function to predict emotions using the custom trained model
def predict_custom_trained_model_sample(
    project: str,
    endpoint_id: str,
    instances: Union[Dict, List[Dict]],
    location: str = "us-east4",
    api_endpoint: str = "us-east4-aiplatform.googleapis.com",
) -> List[str]:
    client_options = {"api_endpoint": api_endpoint}
    client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)
    instances = instances if isinstance(instances, list) else [instances]
    instances = [
        json_format.ParseDict(instance_dict, Value()) for instance_dict in instances
    ]
    parameters_dict = {}
    parameters = json_format.ParseDict(parameters_dict, Value())
    endpoint = client.endpoint_path(
        project=project, location=location, endpoint=endpoint_id
    )
    response = client.predict(
        endpoint=endpoint, instances=instances, parameters=parameters
    )
    predictions_list = []
    predictions = response.predictions
    for prediction in predictions:
        if isinstance(prediction, str):
            clean_prediction = re.sub(r'(\n|Origin|###|Optimization|Response:)', '', prediction)
            split_predictions = clean_prediction.split()
            predictions_list.extend(split_predictions)
        else:
            print(" prediction (unknown type, skipping):", prediction)
    return [emotion for emotion in predictions_list if emotion in d_emotion.values()]

d_emotion = {0: 'admiration', 1: 'amusement', 2: 'anger', 3: 'annoyance', 4: 'approval', 5: 'caring', 6: 'confusion',
             7: 'curiosity', 8: 'desire', 9: 'disappointment', 10: 'disapproval', 11: 'disgust', 12: 'embarrassment',
             13: 'excitement', 14: 'fear', 15: 'gratitude', 16: 'grief', 17: 'joy', 18: 'love', 19: 'nervousness',
             20: 'optimism', 21: 'pride', 22: 'realization', 23: 'relief', 24: 'remorse', 25: 'sadness', 26: 'surprise',
             27: 'neutral'}

st.write(" ")
st.write(" ")
st.header('Sentiment: Emotion Analyses', divider='rainbow')
st.write('Write or paste any number of document texts to analyse the emotion percentage with your document')
st.write(':sunglasses: :smile: :angry: :disappointed: :fearful: :rage:  :weary:	:cry: :sweat_smile: :neutral_face: :blush: :heart_eyes: :innocent: :satisfied: :joy:')

# # Define the sample text
# sample_text = ("Once, in a small village nestled in the rolling hills of Tuscany, lived an elderly woman named Isabella. "
#                "She had spent her entire life in this village, raising her children and caring for her garden, which was the most "
#                "beautiful in the region. Her husband, Marco, had passed away many years ago, leaving her with a heart full of memories "
#                "and a small, quaint house that overlooked the lush vineyards.")

# # Add button to fill in sample text
# if st.button("Use Sample Text"):
#     user_input = st.text_input(label="sample", value=sample_text, label_visibility="hidden")
# else:
user_input = ""

if option:
    user_input = st.text_area('Enter Text to Analyze', Value=option)
else:
    user_input = st.text_area('Enter Text to Analyze')

button = st.button("Analyze")

option = st.selectbox(
    "How would you like to be contacted?",
    ("Email", "Home phone", "Mobile phone"))

st.write("You selected:", option)

if button and user_input:
    alpaca_prompt = """Below is a conversation between a human and an AI agent. write a response based on the input.
        ### Instruction:
        predict the emotion word or words
        ### Input:
        {}
        ### Response:
        """
    instances = []
    input_array = text_split_tokenizer.tokenize(user_input)
    for sentence in input_array:
        formatted_input = alpaca_prompt.format(sentence.strip())
        instance = {
            "inputs": formatted_input,
            "parameters": {
                "max_new_tokens": 4,
                "temperature": 0.00001,
                "top_p": 0.9,
                "top_k": 10
            }
        }
        instances.append(instance)

    predictions = predict_custom_trained_model_sample(
        project=os.environ["project"],
        endpoint_id=os.environ["endpoint_id"],
        location=os.environ["location"],
        instances=instances
    )

    emotion_counts = pd.Series(predictions).value_counts(normalize=True).reset_index()
    emotion_counts.columns = ['Emotion', 'Percentage']
    emotion_counts['Percentage'] *= 100  # Convert to percentage
    fig_pie = px.pie(emotion_counts, values='Percentage', names='Emotion', title='Percentage of Emotions in Given Text')
    fig_pie.update_traces(textposition='inside', textinfo='percent+label')

    @st.cache_data
    def get_emotion_chart(predictions):
        emotion_counts = pd.Series(predictions).value_counts().reset_index()
        emotion_counts.columns = ['Emotion', 'Count']
        fig_bar = go.Figure()
        fig_bar.add_trace(go.Bar(
            x=emotion_counts['Emotion'],
            y=emotion_counts['Count'],
            marker_color='indianred'
        ))
        fig_bar.update_layout(title='Count of Each Emotion in Given Text', xaxis_title='Emotion', yaxis_title='Count')
        return fig_bar

    fig_bar = get_emotion_chart(predictions)

    @st.cache_data
    def get_emotion_heatmap(predictions):
        emotion_counts = pd.Series(predictions).value_counts().reset_index()
        emotion_counts.columns = ['Emotion', 'Count']
        
        heatmap_matrix = pd.DataFrame(0, index=d_emotion.values(), columns=d_emotion.values())
        for index, row in emotion_counts.iterrows():
            heatmap_matrix.at[row['Emotion'], row['Emotion']] = row['Count']
    
        fig = go.Figure(data=go.Heatmap(
            z=heatmap_matrix.values,
            x=heatmap_matrix.columns.tolist(),
            y=heatmap_matrix.index.tolist(),
            text=heatmap_matrix.values,
            hovertemplate="Count: %{text}",
            colorscale='Viridis'
        ))
        fig.update_layout(title='Emotion Heatmap', xaxis_title='Predicted Emotion', yaxis_title='Predicted Emotion')
        return fig
        
    fig_heatmap = get_emotion_heatmap(predictions)
    
    tab1, tab2, tab3 = st.tabs(["Emotion Analysis", "Emotion Counts Distribution", "Heatmap"])
    with tab1:
        st.plotly_chart(fig_pie)
    with tab2:
        st.plotly_chart(fig_bar)
    with tab3:
        st.plotly_chart(fig_heatmap)