Spaces:

empy-ai
/

Token-classification-base

Runtime error

App Files Files Community

David commited on Dec 9, 2022

Commit

73429da

1 Parent(s): 612fb43

initial

Browse files

Files changed (4) hide show

README.md +5 -5
app.py +107 -0
ner_v0_model.ipynb +197 -0
requirements.txt +3 -0

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
-title: Token Classification Base
-emoji: 💻
-colorFrom: indigo
-colorTo: indigo
 sdk: gradio
-sdk_version: 3.12.0
 app_file: app.py
 pinned: false
 ---

 ---
+title: Token Classification
+emoji: 🦀
+colorFrom: blue
+colorTo: red
 sdk: gradio
+sdk_version: 3.11.0
 app_file: app.py
 pinned: false
 ---

app.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import gradio as gr
+import os
+import requests
+from spacy import displacy
+import streamlit as st
+os.system("python -m spacy download en_core_web_md")
+import spacy
+# entities group method
+# https://huggingface.co/spaces/crabz/sk-ner/blob/main/app.py
+options = {"ents": ["Observation",
+                    "Evaluation"],
+           "colors": {
+               "Observation": "#9bddff",
+               "Evaluation": "#f08080",
+           }
+           }
+nlp = spacy.load("en_core_web_md")
+def postprocess(classifications):
+    entities = []
+    for i in range(len(classifications)):
+        if classifications[i]['entity'] != 0:
+            if classifications[i]['entity'][0] == 'B':
+                j = i + 1
+                while j < len(classifications) and classifications[j]['entity'][0] == 'I':
+                    j += 1
+                entities.append((classifications[i]['entity'].split('-')[1], classifications[i]['start'],
+                                 classifications[j - 1]['end']))
+    while True:
+        merged = False
+        to_remove = []
+        merged_entities = []
+        for i in range(len(entities)):
+            for j in range(i + 1, len(entities)):
+                if entities[i] != entities[j] and entities[i][0] == entities[j][0] and \
+                        (entities[i][2] == entities[j][1] or entities[i][1] == entities[j][2]):
+                    to_remove.append(entities[i])
+                    to_remove.append(entities[j])
+                    new_start = min(entities[i][1], entities[j][1])
+                    new_end = max(entities[i][2], entities[j][2])
+                    merged_entities.append((entities[i][0], new_start, new_end))
+                    merged = True
+                    break
+            if merged:
+                break
+        for ent in to_remove:
+            entities.remove(ent)
+        entities += merged_entities
+        if not merged:
+            break
+    return entities
+def set_entities(sentence, entities):
+    doc = nlp(sentence)
+    ents = []
+    for label, start, end in entities:
+        ents.append(doc.char_span(start, end, label))
+    try:
+        doc.ents = ents
+    except TypeError:
+        pass
+    return doc
+def apply_ner(input_text_message: str):
+    auth_endpoint_token = st.secrets["auth_endpoint_token"]
+    endpoint_url = st.secrets["endpoint_url"]
+    headers = {
+        'Authorization': auth_endpoint_token,
+        'Content-Type': 'application/json',
+    }
+    json_data = {
+        'inputs': input_text_message,
+    }
+    response = requests.post(endpoint_url, headers=headers, json=json_data)
+    classifications = response.json()
+    entities = postprocess(classifications)
+    doc = set_entities(input_text_message, entities)
+    displacy_html = displacy.render(doc, style="ent", options=options)
+    return displacy_html
+examples = ['Things are complicated because we still live together but we have separate lives',
+            'My dad is a monster and took his anger out on my mom by verbally abusing her and when she left he eventually moved on to my brother',
+            'A two months ago, she was chatting with some random guy',
+            'Not I have a horrid relationship with my brother we’ve never gotten along and probably never will',
+            'I was outside trying to leave and he caught me to explain why Im so rude',
+            ]
+iface = gr.Interface(fn=apply_ner, inputs=gr.inputs.Textbox(lines=5, placeholder="Enter your text here",
+                                                            label='Check your text for compliance with the NVC rules'),
+                     outputs="html", examples=examples)
+iface.launch()

ner_v0_model.ipynb ADDED Viewed

	@@ -0,0 +1,197 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import gradio as gr\n",
+    "import os\n",
+    "\n",
+    "os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'\n",
+    "import requests\n",
+    "\n",
+    "from spacy import displacy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "\n",
+    "def compute_ner(input_text_message):\n",
+    "    endpoint_url = 'https://on1m82uknekghqeh.us-east-1.aws.endpoints.huggingface.cloud'\n",
+    "\n",
+    "    headers = {\n",
+    "        'Authorization': 'Bearer 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',\n",
+    "        'Content-Type': 'application/json',\n",
+    "    }\n",
+    "\n",
+    "    json_data = {\n",
+    "        'inputs': input_text_message,\n",
+    "    }\n",
+    "\n",
+    "    response = requests.post(endpoint_url, headers=headers, json=json_data)\n",
+    "\n",
+    "    tokens = response.json()\n",
+    "\n",
+    "    entities = []\n",
+    "\n",
+    "    for token in tokens:\n",
+    "        label = token[\"entity\"]\n",
+    "\n",
+    "        if label == \"I-Observation\" or label == \"B-Observation\":\n",
+    "            label = \"Observation\"\n",
+    "            token[\"label\"] = label\n",
+    "            entities.append(token)\n",
+    "\n",
+    "        if label == \"I-Evaluation\" or label == \"B-Evaluation\":\n",
+    "            label = \"Evaluation\"\n",
+    "            token[\"label\"] = label\n",
+    "            entities.append(token)\n",
+    "\n",
+    "    params = [{\"text\": input_text_message,\n",
+    "               \"ents\": entities,\n",
+    "               \"title\": None}]\n",
+    "\n",
+    "    return displacy.render(params, style=\"ent\", manual=True, options={\n",
+    "        \"colors\": {\n",
+    "            \"Observation\": \"#9bddff\",\n",
+    "            \"Evaluation\": \"#f08080\",\n",
+    "        },\n",
+    "    })\n",
+    "\n",
+    "\n",
+    "examples = ['You are dick',\n",
+    "            'My dad is an asshole and took his anger out on my mom by verbally abusing her','He eventually moved on to my brother']\n",
+    "\n",
+    "iface = gr.Interface(fn=compute_ner, inputs=gr.inputs.Textbox(lines=5, placeholder=\"Enter your text here\",label='Check your text on NVC rules'),\n",
+    "                     outputs=\"html\", examples=examples,)\n",
+    "iface.launch(debug=True)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "endpoint_url = 'https://on1m82uknekghqeh.us-east-1.aws.endpoints.huggingface.cloud'\n",
+    "\n",
+    "headers = {\n",
+    "    'Authorization': 'Bearer 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',\n",
+    "    'Content-Type': 'application/json',\n",
+    "}\n",
+    "\n",
+    "json_data = {\n",
+    "    'inputs': 'you are dick',\n",
+    "}\n",
+    "\n",
+    "response = requests.post(endpoint_url, headers=headers, json=json_data)\n",
+    "\n",
+    "tokens = response.json()"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "tokens"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "import spacy\n",
+    "from spacy import displacy"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "!python -m spacy download en_core_web_sm"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "\n",
+    "text = \"When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously.\"\n",
+    "\n",
+    "nlp = spacy.load(\"en_core_web_sm\")\n",
+    "doc = nlp(text)\n",
+    "displacy.serve(doc, style=\"ent\")"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "name": "torch",
+   "language": "python",
+   "display_name": "torch"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio==3.11.0
+requests==2.28.1
+spacy==3.4.3