ababio commited on
Commit
357410f
·
verified ·
1 Parent(s): 4cbddaa

Create app.y

Browse files
Files changed (1) hide show
  1. app.y +178 -0
app.y ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Union
2
+ from google.cloud import aiplatform
3
+ from google.protobuf import json_format
4
+ from google.protobuf.struct_pb2 import Value
5
+ import os
6
+ import re
7
+ import pandas as pd
8
+ import plotly.express as px
9
+ import plotly.graph_objects as go
10
+ import streamlit as st
11
+ import nltk
12
+ import json
13
+ import tempfile
14
+
15
+ # process of getting credentials
16
+ def get_credentials():
17
+ creds_json_str = os.getenv("JSONSTR") # get json credentials stored as a string
18
+ if creds_json_str is None:
19
+ raise ValueError("GOOGLE_APPLICATION_CREDENTIALS_JSON not found in environment")
20
+
21
+ # create a temporary file
22
+ with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".json") as temp:
23
+ temp.write(creds_json_str) # write in json format
24
+ temp_filename = temp.name
25
+
26
+ return temp_filename
27
+
28
+ # pass
29
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"]= get_credentials()
30
+
31
+
32
+
33
+ max_seq_length = 2048
34
+ dtype = None
35
+ load_in_4bit = True
36
+
37
+ # Check if 'punkt' is already downloaded, otherwise download it
38
+ try:
39
+ nltk.data.find('tokenizers/punkt')
40
+ except LookupError:
41
+ nltk.download('punkt')
42
+
43
+ text_split_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
44
+
45
+
46
+ def predict_custom_trained_model_sample(
47
+ project: str,
48
+ endpoint_id: str,
49
+ instances: Union[Dict, List[Dict]],
50
+ location: str = "us-east4",
51
+ api_endpoint: str = "us-east4-aiplatform.googleapis.com",
52
+ ) -> List[str]:
53
+ """
54
+ `instances` can be either single instance of type dict or a list
55
+ of instances.
56
+ """
57
+ client_options = {"api_endpoint": api_endpoint}
58
+ client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)
59
+ instances = instances if isinstance(instances, list) else [instances]
60
+ instances = [
61
+ json_format.ParseDict(instance_dict, Value()) for instance_dict in instances
62
+ ]
63
+ parameters_dict = {}
64
+ parameters = json_format.ParseDict(parameters_dict, Value())
65
+ endpoint = client.endpoint_path(
66
+ project=project, location=location, endpoint=endpoint_id
67
+ )
68
+ response = client.predict(
69
+ endpoint=endpoint, instances=instances, parameters=parameters
70
+ )
71
+ predictions_list = []
72
+ predictions = response.predictions
73
+ for prediction in predictions:
74
+ if isinstance(prediction, str):
75
+ clean_prediction = re.sub(r'(\n|Origin|###|Optimization|Response:)', '', prediction)
76
+ split_predictions = clean_prediction.split()
77
+ predictions_list.extend(split_predictions)
78
+ else:
79
+ print(" prediction (unknown type, skipping):", prediction)
80
+ return [emotion for emotion in predictions_list if emotion in d_emotion.values()]
81
+
82
+
83
+ d_emotion = {0: 'admiration', 1: 'amusement', 2: 'anger', 3: 'annoyance', 4: 'approval', 5: 'caring', 6: 'confusion',
84
+ 7: 'curiosity', 8: 'desire', 9: 'disappointment', 10: 'disapproval', 11: 'disgust', 12: 'embarrassment',
85
+ 13: 'excitement', 14: 'fear', 15: 'gratitude', 16: 'grief', 17: 'joy', 18: 'love', 19: 'nervousness',
86
+ 20: 'optimism', 21: 'pride', 22: 'realization', 23: 'relief', 24: 'remorse', 25: 'sadness', 26: 'surprise',
87
+ 27: 'neutral'}
88
+
89
+ st.write("Write or paste any number of document texts to analyse the emotion percentage with your document")
90
+
91
+ user_input = st.text_area('Enter Text to Analyze')
92
+ button = st.button("Analyze")
93
+
94
+ if user_input and button:
95
+ alpaca_prompt = """Below is a conversation between a human and an AI agent. write a response based on the input.
96
+ ### Instruction:
97
+ predict the emotion word or words
98
+ ### Input:
99
+ {}
100
+ ### Response:
101
+ """
102
+
103
+ instances = []
104
+ input_array = text_split_tokenizer.tokenize(user_input)
105
+ for sentence in input_array:
106
+ formatted_input = alpaca_prompt.format(sentence.strip())
107
+ instance = {
108
+ "inputs": formatted_input,
109
+ "parameters": {
110
+ "max_new_tokens": 4,
111
+ "temperature": 0.00001,
112
+ "top_p": 0.9,
113
+ "top_k": 10
114
+ }
115
+ }
116
+ instances.append(instance)
117
+
118
+ predictions = predict_custom_trained_model_sample(
119
+ project=os.environ["project"],
120
+ endpoint_id=os.environ["endpoint_id"],
121
+ location=os.environ["location"],
122
+ instances=instances
123
+ )
124
+
125
+ emotion_counts = pd.Series(predictions).value_counts(normalize=True).reset_index()
126
+ emotion_counts.columns = ['Emotion', 'Percentage']
127
+ emotion_counts['Percentage'] *= 100 # Convert to percentage
128
+ fig_pie = px.pie(emotion_counts, values='Percentage', names='Emotion', title='Percentage of Emotions in Given Text')
129
+ fig_pie.update_traces(textposition='inside', textinfo='percent+label')
130
+
131
+ @st.cache_data
132
+ def get_emotion_chart(predictions):
133
+ emotion_counts = pd.Series(predictions).value_counts().reset_index()
134
+ emotion_counts.columns = ['Emotion', 'Count']
135
+ fig_bar = go.Figure()
136
+ fig_bar.add_trace(go.Bar(
137
+ x=emotion_counts['Emotion'],
138
+ y=emotion_counts['Count'],
139
+ marker_color='indianred'
140
+ ))
141
+ fig_bar.update_layout(title='Count of Each Emotion in Given Text', xaxis_title='Emotion', yaxis_title='Count')
142
+ return fig_bar
143
+
144
+ fig_bar = get_emotion_chart(predictions)
145
+
146
+
147
+ @st.cache_data
148
+ def get_emotion_heatmap(predictions):
149
+ # Create a matrix for heatmap
150
+
151
+ # Count occurrences of each emotion
152
+ emotion_counts = pd.Series(predictions).value_counts().reset_index()
153
+ emotion_counts.columns = ['Emotion', 'Count']
154
+
155
+ heatmap_matrix = pd.DataFrame(0, index=d_emotion.values(), columns=d_emotion.values())
156
+ for index, row in emotion_counts.iterrows():
157
+ heatmap_matrix.at[row['Emotion'], row['Emotion']] = row['Count']
158
+
159
+ fig = go.Figure(data=go.Heatmap(
160
+ z=heatmap_matrix.values,
161
+ x=heatmap_matrix.columns.tolist(),
162
+ y=heatmap_matrix.index.tolist(),
163
+ text=heatmap_matrix.values,
164
+ hovertemplate="Count: %{text}",
165
+ colorscale='Viridis'
166
+ ))
167
+ fig.update_layout(title='Emotion Heatmap', xaxis_title='Predicted Emotion', yaxis_title='Predicted Emotion')
168
+ return fig
169
+
170
+ fig_dist = get_emotion_heatmap(predictions)
171
+
172
+ tab1, tab2, tab3 = st.tabs(["Emotion Analysis", "Emotion Counts Distribution", "Heatmap"])
173
+ with tab1:
174
+ st.plotly_chart(fig_pie)
175
+ with tab2:
176
+ st.plotly_chart(fig_bar)
177
+ with tab3:
178
+ st.plotly_chart(fig_dist)