David commited on
Commit
73429da
·
1 Parent(s): 612fb43
Files changed (4) hide show
  1. README.md +5 -5
  2. app.py +107 -0
  3. ner_v0_model.ipynb +197 -0
  4. requirements.txt +3 -0
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
- title: Token Classification Base
3
- emoji: 💻
4
- colorFrom: indigo
5
- colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 3.12.0
8
  app_file: app.py
9
  pinned: false
10
  ---
 
1
  ---
2
+ title: Token Classification
3
+ emoji: 🦀
4
+ colorFrom: blue
5
+ colorTo: red
6
  sdk: gradio
7
+ sdk_version: 3.11.0
8
  app_file: app.py
9
  pinned: false
10
  ---
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+
4
+ import requests
5
+
6
+ from spacy import displacy
7
+ import streamlit as st
8
+
9
+ os.system("python -m spacy download en_core_web_md")
10
+ import spacy
11
+
12
+ # entities group method
13
+ # https://huggingface.co/spaces/crabz/sk-ner/blob/main/app.py
14
+
15
+ options = {"ents": ["Observation",
16
+ "Evaluation"],
17
+ "colors": {
18
+ "Observation": "#9bddff",
19
+ "Evaluation": "#f08080",
20
+ }
21
+
22
+ }
23
+
24
+ nlp = spacy.load("en_core_web_md")
25
+
26
+
27
+ def postprocess(classifications):
28
+ entities = []
29
+ for i in range(len(classifications)):
30
+ if classifications[i]['entity'] != 0:
31
+ if classifications[i]['entity'][0] == 'B':
32
+ j = i + 1
33
+ while j < len(classifications) and classifications[j]['entity'][0] == 'I':
34
+ j += 1
35
+ entities.append((classifications[i]['entity'].split('-')[1], classifications[i]['start'],
36
+ classifications[j - 1]['end']))
37
+ while True:
38
+ merged = False
39
+ to_remove = []
40
+ merged_entities = []
41
+ for i in range(len(entities)):
42
+ for j in range(i + 1, len(entities)):
43
+ if entities[i] != entities[j] and entities[i][0] == entities[j][0] and \
44
+ (entities[i][2] == entities[j][1] or entities[i][1] == entities[j][2]):
45
+ to_remove.append(entities[i])
46
+ to_remove.append(entities[j])
47
+
48
+ new_start = min(entities[i][1], entities[j][1])
49
+ new_end = max(entities[i][2], entities[j][2])
50
+ merged_entities.append((entities[i][0], new_start, new_end))
51
+ merged = True
52
+ break
53
+ if merged:
54
+ break
55
+ for ent in to_remove:
56
+ entities.remove(ent)
57
+ entities += merged_entities
58
+ if not merged:
59
+ break
60
+ return entities
61
+
62
+
63
+ def set_entities(sentence, entities):
64
+ doc = nlp(sentence)
65
+ ents = []
66
+ for label, start, end in entities:
67
+ ents.append(doc.char_span(start, end, label))
68
+ try:
69
+ doc.ents = ents
70
+ except TypeError:
71
+ pass
72
+ return doc
73
+
74
+
75
+ def apply_ner(input_text_message: str):
76
+ auth_endpoint_token = st.secrets["auth_endpoint_token"]
77
+ endpoint_url = st.secrets["endpoint_url"]
78
+
79
+ headers = {
80
+ 'Authorization': auth_endpoint_token,
81
+ 'Content-Type': 'application/json',
82
+ }
83
+
84
+ json_data = {
85
+ 'inputs': input_text_message,
86
+ }
87
+
88
+ response = requests.post(endpoint_url, headers=headers, json=json_data)
89
+
90
+ classifications = response.json()
91
+ entities = postprocess(classifications)
92
+ doc = set_entities(input_text_message, entities)
93
+ displacy_html = displacy.render(doc, style="ent", options=options)
94
+ return displacy_html
95
+
96
+
97
+ examples = ['Things are complicated because we still live together but we have separate lives',
98
+ 'My dad is a monster and took his anger out on my mom by verbally abusing her and when she left he eventually moved on to my brother',
99
+ 'A two months ago, she was chatting with some random guy',
100
+ 'Not I have a horrid relationship with my brother we’ve never gotten along and probably never will',
101
+ 'I was outside trying to leave and he caught me to explain why Im so rude',
102
+ ]
103
+
104
+ iface = gr.Interface(fn=apply_ner, inputs=gr.inputs.Textbox(lines=5, placeholder="Enter your text here",
105
+ label='Check your text for compliance with the NVC rules'),
106
+ outputs="html", examples=examples)
107
+ iface.launch()
ner_v0_model.ipynb ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {
7
+ "collapsed": true,
8
+ "pycharm": {
9
+ "name": "#%%\n"
10
+ }
11
+ },
12
+ "outputs": [],
13
+ "source": [
14
+ "import gradio as gr\n",
15
+ "import os\n",
16
+ "\n",
17
+ "os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'\n",
18
+ "import requests\n",
19
+ "\n",
20
+ "from spacy import displacy"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": null,
26
+ "outputs": [],
27
+ "source": [
28
+ "\n",
29
+ "def compute_ner(input_text_message):\n",
30
+ " endpoint_url = 'https://on1m82uknekghqeh.us-east-1.aws.endpoints.huggingface.cloud'\n",
31
+ "\n",
32
+ " headers = {\n",
33
+ " 'Authorization': 'Bearer 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',\n",
34
+ " 'Content-Type': 'application/json',\n",
35
+ " }\n",
36
+ "\n",
37
+ " json_data = {\n",
38
+ " 'inputs': input_text_message,\n",
39
+ " }\n",
40
+ "\n",
41
+ " response = requests.post(endpoint_url, headers=headers, json=json_data)\n",
42
+ "\n",
43
+ " tokens = response.json()\n",
44
+ "\n",
45
+ " entities = []\n",
46
+ "\n",
47
+ " for token in tokens:\n",
48
+ " label = token[\"entity\"]\n",
49
+ "\n",
50
+ " if label == \"I-Observation\" or label == \"B-Observation\":\n",
51
+ " label = \"Observation\"\n",
52
+ " token[\"label\"] = label\n",
53
+ " entities.append(token)\n",
54
+ "\n",
55
+ " if label == \"I-Evaluation\" or label == \"B-Evaluation\":\n",
56
+ " label = \"Evaluation\"\n",
57
+ " token[\"label\"] = label\n",
58
+ " entities.append(token)\n",
59
+ "\n",
60
+ " params = [{\"text\": input_text_message,\n",
61
+ " \"ents\": entities,\n",
62
+ " \"title\": None}]\n",
63
+ "\n",
64
+ " return displacy.render(params, style=\"ent\", manual=True, options={\n",
65
+ " \"colors\": {\n",
66
+ " \"Observation\": \"#9bddff\",\n",
67
+ " \"Evaluation\": \"#f08080\",\n",
68
+ " },\n",
69
+ " })\n",
70
+ "\n",
71
+ "\n",
72
+ "examples = ['You are dick',\n",
73
+ " 'My dad is an asshole and took his anger out on my mom by verbally abusing her','He eventually moved on to my brother']\n",
74
+ "\n",
75
+ "iface = gr.Interface(fn=compute_ner, inputs=gr.inputs.Textbox(lines=5, placeholder=\"Enter your text here\",label='Check your text on NVC rules'),\n",
76
+ " outputs=\"html\", examples=examples,)\n",
77
+ "iface.launch(debug=True)"
78
+ ],
79
+ "metadata": {
80
+ "collapsed": false,
81
+ "pycharm": {
82
+ "name": "#%%\n"
83
+ }
84
+ }
85
+ },
86
+ {
87
+ "cell_type": "code",
88
+ "execution_count": null,
89
+ "outputs": [],
90
+ "source": [
91
+ "endpoint_url = 'https://on1m82uknekghqeh.us-east-1.aws.endpoints.huggingface.cloud'\n",
92
+ "\n",
93
+ "headers = {\n",
94
+ " 'Authorization': 'Bearer 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',\n",
95
+ " 'Content-Type': 'application/json',\n",
96
+ "}\n",
97
+ "\n",
98
+ "json_data = {\n",
99
+ " 'inputs': 'you are dick',\n",
100
+ "}\n",
101
+ "\n",
102
+ "response = requests.post(endpoint_url, headers=headers, json=json_data)\n",
103
+ "\n",
104
+ "tokens = response.json()"
105
+ ],
106
+ "metadata": {
107
+ "collapsed": false,
108
+ "pycharm": {
109
+ "name": "#%%\n"
110
+ }
111
+ }
112
+ },
113
+ {
114
+ "cell_type": "code",
115
+ "execution_count": null,
116
+ "outputs": [],
117
+ "source": [
118
+ "tokens"
119
+ ],
120
+ "metadata": {
121
+ "collapsed": false,
122
+ "pycharm": {
123
+ "name": "#%%\n"
124
+ }
125
+ }
126
+ },
127
+ {
128
+ "cell_type": "code",
129
+ "execution_count": null,
130
+ "outputs": [],
131
+ "source": [
132
+ "import spacy\n",
133
+ "from spacy import displacy"
134
+ ],
135
+ "metadata": {
136
+ "collapsed": false,
137
+ "pycharm": {
138
+ "name": "#%%\n"
139
+ }
140
+ }
141
+ },
142
+ {
143
+ "cell_type": "code",
144
+ "execution_count": null,
145
+ "outputs": [],
146
+ "source": [
147
+ "!python -m spacy download en_core_web_sm"
148
+ ],
149
+ "metadata": {
150
+ "collapsed": false,
151
+ "pycharm": {
152
+ "name": "#%%\n"
153
+ }
154
+ }
155
+ },
156
+ {
157
+ "cell_type": "code",
158
+ "execution_count": null,
159
+ "outputs": [],
160
+ "source": [
161
+ "\n",
162
+ "text = \"When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously.\"\n",
163
+ "\n",
164
+ "nlp = spacy.load(\"en_core_web_sm\")\n",
165
+ "doc = nlp(text)\n",
166
+ "displacy.serve(doc, style=\"ent\")"
167
+ ],
168
+ "metadata": {
169
+ "collapsed": false,
170
+ "pycharm": {
171
+ "name": "#%%\n"
172
+ }
173
+ }
174
+ }
175
+ ],
176
+ "metadata": {
177
+ "kernelspec": {
178
+ "name": "torch",
179
+ "language": "python",
180
+ "display_name": "torch"
181
+ },
182
+ "language_info": {
183
+ "codemirror_mode": {
184
+ "name": "ipython",
185
+ "version": 2
186
+ },
187
+ "file_extension": ".py",
188
+ "mimetype": "text/x-python",
189
+ "name": "python",
190
+ "nbconvert_exporter": "python",
191
+ "pygments_lexer": "ipython2",
192
+ "version": "2.7.6"
193
+ }
194
+ },
195
+ "nbformat": 4,
196
+ "nbformat_minor": 0
197
+ }
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==3.11.0
2
+ requests==2.28.1
3
+ spacy==3.4.3