Lamp Socrates commited on
Commit
0f52168
Β·
1 Parent(s): aed0940

Initial commit

Browse files
Files changed (3) hide show
  1. README.md +11 -4
  2. app.py +230 -0
  3. requirements.txt +31 -0
README.md CHANGED
@@ -1,13 +1,20 @@
1
  ---
2
- title: Hf Streamlit Plodcw Group27
3
  emoji: πŸ‘€
4
- colorFrom: indigo
5
- colorTo: yellow
6
  sdk: streamlit
7
- sdk_version: 1.35.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
 
 
11
  ---
12
 
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
1
  ---
2
+ title: Hf Streamlit Cw Group27
3
  emoji: πŸ‘€
4
+ colorFrom: blue
5
+ colorTo: blue
6
  sdk: streamlit
7
+ sdk_version: 1.34.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
+ python_version: 3.11.5
12
+ short_description: Sample space for group coursework for NLP
13
  ---
14
 
15
+
16
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
17
+
18
+
19
+ ## Setting up Github Actions
20
+ https://huggingface.co/docs/hub/en/spaces-github-actions
app.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+ from transformers import AutoTokenizer, AutoModelForTokenClassification
4
+ import pandas as pd
5
+ from pprint import pprint
6
+
7
+
8
+ @st.cache_resource()
9
+ def load_trained_model():
10
+
11
+ tokenizer = AutoTokenizer.from_pretrained("LampOfSocrates/bert-cased-plodcw-sourav")
12
+ model = AutoModelForTokenClassification.from_pretrained("LampOfSocrates/bert-cased-plodcw-sourav")
13
+ # Mapping labels
14
+ id2label = model.config.id2label
15
+ # Print the label mapping
16
+ print(f"Can recognise the following labels {id2label}")
17
+
18
+ # Load the NER model and tokenizer from Hugging Face
19
+ #ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
20
+ ner_pipeline = pipeline("ner", model=model, tokenizer = tokenizer)
21
+ return ner_pipeline
22
+
23
+
24
+ @st.cache_data()
25
+ def load_plod_cw_dataset():
26
+ from datasets import load_dataset
27
+ dataset = load_dataset("surrey-nlp/PLOD-CW")
28
+ return dataset
29
+
30
+ def load_random_examples(dataset_name, num_examples=5):
31
+ """
32
+ Load random examples from the specified Hugging Face dataset.
33
+ Args:
34
+ dataset_name (str): The name of the dataset to load.
35
+ num_examples (int): The number of random examples to load.
36
+ Returns:
37
+ pd.DataFrame: A DataFrame containing the random examples.
38
+ """
39
+ # Load the dataset
40
+
41
+ dat = load_plod_cw_dataset()
42
+
43
+ # Convert the dataset to a pandas DataFrame
44
+ df = pd.DataFrame(dat['test'])
45
+
46
+ # Select random examples
47
+ random_examples = df.sample(n=1)
48
+
49
+ tokens = random_examples.tokens
50
+ ner_tags = random_examples.ner_tags
51
+
52
+ return pd.DataFrame((tokens, ner_tags))
53
+
54
+
55
+ def render_entities(tokens, entities):
56
+ """
57
+ Renders a page with a 2-column table showing the entity corresponding to each token.
58
+ """
59
+
60
+ # Custom CSS for chilled and cool theme
61
+ st.markdown("""
62
+ <style>
63
+ body {
64
+ font-family: 'Arial', sans-serif;
65
+ background-color: #f0f0f5;
66
+ color: #333333;
67
+ }
68
+ table {
69
+ width: 100%;
70
+ border-collapse: collapse;
71
+ }
72
+ th, td {
73
+ padding: 12px;
74
+ text-align: left;
75
+ border-bottom: 1px solid #dddddd;
76
+ }
77
+ th {
78
+ background-color: #4CAF50;
79
+ color: white;
80
+ width: 16.66%;
81
+ }
82
+ tr:hover {
83
+ background-color: #f5f5f5;
84
+ }
85
+ td {
86
+ width: 16.66%;
87
+ }
88
+ </style>
89
+ """, unsafe_allow_html=True)
90
+
91
+ # Title and description
92
+ st.title("Model predicted Token vs Entities Table")
93
+ st.write("This table shows the entity corresponding to each token in a cool and chilled theme.")
94
+
95
+ # Create the table
96
+ table_data = {"Token": tokens, "Entity": entities}
97
+ st.table(table_data)
98
+
99
+ def render_random_examples():
100
+ """
101
+ Render random examples from the PLOD-CW dataset in a Streamlit table.
102
+ """
103
+ # Load random examples
104
+
105
+ # Custom CSS for chilled and cool theme
106
+ st.markdown("""
107
+ <style>
108
+ body {
109
+ font-family: 'Arial', sans-serif;
110
+ background-color: #f0f0f5;
111
+ color: #333333;
112
+ }
113
+ table {
114
+ width: 100%;
115
+ border-collapse: collapse;
116
+ }
117
+ th, td {
118
+ padding: 12px;
119
+ text-align: left;
120
+ border-bottom: 1px solid #dddddd;
121
+ }
122
+ th {
123
+ background-color: #4CAF50;
124
+ color: white;
125
+ width: 16.66%;
126
+ }
127
+ tr:hover {
128
+ background-color: #f5f5f5;
129
+ }
130
+ td {
131
+ width: 16.66%;
132
+ }
133
+ </style>
134
+ """, unsafe_allow_html=True)
135
+
136
+ # Title and description
137
+ st.title("Random Examples from PLOD-CW")
138
+ st.write("This table shows 1 random examples from the PLOD-CW dataset in a cool and chilled theme.")
139
+
140
+ # Add a button to select a different set of random samples
141
+ if st.button('Show another set of random examples'):
142
+ st.session_state['random_examples'] = load_random_examples("surrey-nlp/PLOD-CW")
143
+
144
+ # Load random examples if not already loaded
145
+ if 'random_examples' not in st.session_state:
146
+ st.session_state['random_examples'] = load_random_examples("surrey-nlp/PLOD-CW")
147
+
148
+ # Display the table
149
+ st.table(st.session_state['random_examples'])
150
+ def predict_using_trained(sentence):
151
+ model = load_trained_model()
152
+
153
+ entities = model(sentence)
154
+
155
+ return entities
156
+
157
+ def prep_page():
158
+ model = load_trained_model()
159
+
160
+ # Streamlit app
161
+ # Page configuration
162
+ #st.set_page_config(page_title="NER Token Entities", layout="centered")
163
+
164
+ st.title("Named Entity Recognition with BERT on PLOD-CW")
165
+ st.write("Enter a sentence to see the named entities recognized by the model.")
166
+
167
+ # Text input
168
+ text = st.text_area("Enter your sentence here:")
169
+
170
+ # Perform NER and display results
171
+ if text:
172
+ st.write("Entities recognized:")
173
+ entities = model(text)
174
+
175
+ pprint(entities)
176
+
177
+ # Create a dictionary to map entity labels to colors
178
+ label_colors = {
179
+ 'B-LF': 'lightblue',
180
+ 'B-O': 'lightgreen',
181
+ 'B-AC': 'lightcoral',
182
+ 'I-LF': 'lightyellow'
183
+ }
184
+
185
+ # Prepare the HTML output with styled entities
186
+ def get_entity_html(text, entities):
187
+ html = "<div>"
188
+ last_idx = 0
189
+ for entity in entities:
190
+ start = entity['start']
191
+ end = entity['end']
192
+ label = entity['entity']
193
+ entity_text = text[start:end]
194
+ color = label_colors.get(label, 'lightgray')
195
+
196
+ # Append the text before the entity
197
+ html += text[last_idx:start].replace(" ", "<br>")
198
+ # Append the entity with styling
199
+ html += f'<div style="background-color: {color}; padding: 5px; border-radius: 3px; margin: 5px 0;">{entity_text}</div>'
200
+ last_idx = end
201
+
202
+ # Append any remaining text after the last entity
203
+ html += text[last_idx:].replace(" ", "<br>")
204
+ html += "</div>"
205
+ return html
206
+
207
+ # Generate and display the styled HTML
208
+ styled_text = get_entity_html(text, entities)
209
+
210
+ st.markdown(styled_text, unsafe_allow_html=True)
211
+
212
+ render_entities(text, entities)
213
+
214
+ render_random_examples()
215
+
216
+
217
+
218
+ if __name__ == '__main__':
219
+
220
+ query_params = st.query_params
221
+ if 'api' in query_params:
222
+ sentence = query_params.get('sentence')
223
+ entities = predict_using_trained(sentence)
224
+ response = {"sentence" : sentence , "entities" : entities}
225
+ pprint(response)
226
+
227
+ st.write(response)
228
+ else:
229
+ prep_page()
230
+
requirements.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch==2.2.0
2
+ tensorflow==2.15.0
3
+ datasets==2.18.0
4
+ torchtext==0.17.0
5
+ torchvision==0.17.0
6
+ torchsummary==1.5.1
7
+ accelerate==0.26.0
8
+ gensim==4.3.2
9
+ transformers==4.39.3
10
+ pynvml==11.5.0
11
+ seqeval==1.2.2
12
+ triton==2.2.0
13
+ jupyter==1.0.0
14
+ jupyterlab-git==0.50.0
15
+ urllib3<2
16
+ scikit-learn
17
+ scipy==1.10.1
18
+ numpy
19
+ fastai==2.7.14
20
+ timm==0.9.12
21
+ tensorboard
22
+ albumentations==1.4.3
23
+ seaborn
24
+ tqdm==4.66.2
25
+ nbdime
26
+ matplotlib
27
+ opencv-python
28
+ Keras-Preprocessing==1.1.2
29
+ flask==2.1.0
30
+ Werkzeug==2.2.2
31
+ wandb==0.17.0