Update spaCy pipeline

Browse files

Files changed (9) hide show

.gitattributes +1 -0
README.md +12 -80
config.cfg +25 -36
meta.json +3 -27
ner/model +0 -0
ner/moves +1 -1
tok2vec/model +2 -2
vocab/strings.json +0 -0
xx_eb_ner-any-py3-none-any.whl +2 -2

.gitattributes CHANGED Viewed

@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tok2vec/model filter=lfs diff=lfs merge=lfs -text
 xx_eb_ner-any-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text

 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tok2vec/model filter=lfs diff=lfs merge=lfs -text
 xx_eb_ner-any-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
+ner/model filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -4,51 +4,18 @@ tags:
 - token-classification
 language:
 - multilingual
-widget:
-- text: I'm looking for courses in machine learning.
-  example_title: Course example
-- text: Can you send me some sales jobs?
-  example_title: Job example
-- text: I'm from Berlin, Germany. Can you recommend courses to me?
-  example_title: Location example
-- text: >-
-    Next month I will be moving to London. I'm looking for software development
-    jobs there. And can you recommend any language courses so I can meet new
-    people?
-  example_title: Mixed example
-model-index:
-- name: xx_eb_ner
-  results:
-  - task:
-      name: NER
-      type: token-classification
-    metrics:
-    - name: NER Precision
-      type: precision
-      value: 0.99471974
-    - name: NER Recall
-      type: recall
-      value: 0.9937070263
-    - name: NER F Score
-      type: f_score
-      value: 0.9942131253
-license: mit
-library_name: spacy
 ---
-| Feature              | Description                                           |
-| ---                  | ---                                                   |
-| **Name**             | `xx_eb_ner`                                           |
-| **Version**          | `0.2.0`                                               |
-| **spaCy**            | `>=3.6.1,<3.7.0`                                      |
-| **Default Pipeline** | `tok2vec`, `ner`                                      |
-| **Components**       | `tok2vec`, `ner`                                      |
-| **Vectors**          | 0 keys, 0 unique vectors (0 dimensions)               |
-| **Sources**          | n/a                                                   |
-| **License**          | mit                                                   |
-| **Author**           | [philipp-zettl](https://huggingface.co/philipp-zettl) |
 ### Label Scheme
@@ -60,39 +27,4 @@ library_name: spacy
 | --- | --- |
 | **`ner`** | `COURSE_NAME`, `JOB_TITLE`, `LOCATION` |
-</details>
-### Accuracy
-| Type           | Score    |
-| ---            | ---      |
-| `ENTS_F`       | 99.54    |
-| `ENTS_P`       | 99.56    |
-| `ENTS_R`       | 99.52    |
-| `TOK2VEC_LOSS` | 35345.94 |
-| `NER_LOSS`     | 32265.61 |
-### Usage
-Install the model via pip:
-```shell
-pip install https://huggingface.co/philipp-zettl/xx_eb_ner/resolve/main/xx_eb_ner-any-py3-none-any.whl
-```
-For specific versions, please use the commits provided in the [source repository](https://huggingface.co/philipp-zettl/xx_eb_ner).
-Example: version v0.2.0
-```shell
-pip install https://huggingface.co/philipp-zettl/xx_eb_ner/resolve/v0.2.0/xx_eb_ner-any-py3-none-any.whl
-```
-After installing the model with it's dependencies, you can use it like any other SpaCy model:
-```python
-# Using spacy.load().
-import spacy
-nlp = spacy.load("xx_eb_ner")
-# Importing as module.
-import xx_eb_ner
-nlp = xx_eb_ner.load()
-```

 - token-classification
 language:
 - multilingual
 ---
+| Feature | Description |
+| --- | --- |
+| **Name** | `xx_eb_ner` |
+| **Version** | `0.2.1` |
+| **spaCy** | `>=3.7.4,<3.8.0` |
+| **Default Pipeline** | `tok2vec`, `ner` |
+| **Components** | `tok2vec`, `ner` |
+| **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
+| **Sources** | n/a |
+| **License** | n/a |
+| **Author** | [n/a]() |
 ### Label Scheme
 | --- | --- |
 | **`ner`** | `COURSE_NAME`, `JOB_TITLE`, `LOCATION` |
+</details>

config.cfg CHANGED Viewed

@@ -5,18 +5,19 @@ vectors = null
 init_tok2vec = null
 [system]
-gpu_allocator = null
 seed = 0
 [nlp]
 lang = "xx"
 pipeline = ["tok2vec","ner"]
-tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
 disabled = []
 before_creation = null
 after_creation = null
 after_pipeline_creation = null
 batch_size = 1000
 [components]
@@ -37,51 +38,47 @@ use_upper = true
 nO = null
 [components.ner.model.tok2vec]
-@architectures = "spacy.Tok2VecListener.v1"
-width = ${components.tok2vec.model.encode.width}
-upstream = "*"
 [components.tok2vec]
 factory = "tok2vec"
 [components.tok2vec.model]
-@architectures = "spacy.Tok2Vec.v2"
-[components.tok2vec.model.embed]
-@architectures = "spacy.MultiHashEmbed.v2"
-width = ${components.tok2vec.model.encode.width}
-attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
-rows = [5000,1000,2500,2500]
-include_static_vectors = true
-[components.tok2vec.model.encode]
-@architectures = "spacy.MaxoutWindowEncoder.v2"
-width = 256
-depth = 8
 window_size = 1
 maxout_pieces = 3
 [corpora]
 [corpora.dev]
 @readers = "spacy.Corpus.v1"
 path = ${paths.dev}
-max_length = 0
 gold_preproc = false
 limit = 0
 augmenter = null
 [corpora.train]
 @readers = "spacy.Corpus.v1"
 path = ${paths.train}
-max_length = 0
 gold_preproc = false
 limit = 0
 augmenter = null
 [training]
-dev_corpus = "corpora.dev"
-train_corpus = "corpora.train"
 seed = ${system.seed}
 gpu_allocator = ${system.gpu_allocator}
 dropout = 0.1
@@ -92,28 +89,25 @@ max_steps = 20000
 eval_frequency = 200
 frozen_components = []
 annotating_components = []
 before_to_disk = null
 before_update = null
 [training.batcher]
 @batchers = "spacy.batch_by_words.v1"
 discard_oversize = false
 tolerance = 0.2
 get_length = null
-[training.batcher.size]
-@schedules = "compounding.v1"
-start = 100
-stop = 1000
-compound = 1.001
-t = 0.0
 [training.logger]
 @loggers = "spacy.ConsoleLogger.v1"
 progress_bar = false
 [training.optimizer]
 @optimizers = "Adam.v1"
 beta1 = 0.9
 beta2 = 0.999
 L2_is_weight_decay = true
@@ -121,7 +115,6 @@ L2 = 0.01
 grad_clip = 1.0
 use_averages = false
 eps = 0.00000001
-learn_rate = 0.001
 [training.score_weights]
 ents_f = 1.0
@@ -132,17 +125,13 @@ ents_per_type = null
 [pretraining]
 [initialize]
-vectors = ${paths.vectors}
 init_tok2vec = ${paths.init_tok2vec}
 vocab_data = null
 before_init = null
 after_init = null
 [initialize.components]
-[initialize.lookups]
-@misc = "spacy.LookupsDataLoader.v1"
-lang = ${nlp.lang}
-tables = []
 [initialize.tokenizer]

 init_tok2vec = null
 [system]
+gpu_allocator = "\"pytorch\"  # Use GPU memory management, if available"
 seed = 0
 [nlp]
 lang = "xx"
 pipeline = ["tok2vec","ner"]
 disabled = []
 before_creation = null
 after_creation = null
 after_pipeline_creation = null
 batch_size = 1000
+tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
+vectors = {"@vectors":"spacy.Vectors.v1"}
 [components]
 nO = null
 [components.ner.model.tok2vec]
+@architectures = "spacy.HashEmbedCNN.v2"
+pretrained_vectors = null
+width = 96
+depth = 4
+embed_size = 2000
+window_size = 1
+maxout_pieces = 3
+subword_features = true
 [components.tok2vec]
 factory = "tok2vec"
 [components.tok2vec.model]
+@architectures = "spacy.HashEmbedCNN.v2"
+pretrained_vectors = null
+width = 96
+depth = 4
+embed_size = 2000
 window_size = 1
 maxout_pieces = 3
+subword_features = true
 [corpora]
 [corpora.dev]
 @readers = "spacy.Corpus.v1"
 path = ${paths.dev}
 gold_preproc = false
+max_length = 0
 limit = 0
 augmenter = null
 [corpora.train]
 @readers = "spacy.Corpus.v1"
 path = ${paths.train}
 gold_preproc = false
+max_length = 0
 limit = 0
 augmenter = null
 [training]
 seed = ${system.seed}
 gpu_allocator = ${system.gpu_allocator}
 dropout = 0.1
 eval_frequency = 200
 frozen_components = []
 annotating_components = []
+dev_corpus = "corpora.dev"
+train_corpus = "corpora.train"
 before_to_disk = null
 before_update = null
 [training.batcher]
 @batchers = "spacy.batch_by_words.v1"
+size = 1000
 discard_oversize = false
 tolerance = 0.2
 get_length = null
 [training.logger]
 @loggers = "spacy.ConsoleLogger.v1"
 progress_bar = false
 [training.optimizer]
 @optimizers = "Adam.v1"
+learn_rate = 0.001
 beta1 = 0.9
 beta2 = 0.999
 L2_is_weight_decay = true
 grad_clip = 1.0
 use_averages = false
 eps = 0.00000001
 [training.score_weights]
 ents_f = 1.0
 [pretraining]
 [initialize]
+vectors = null
 init_tok2vec = ${paths.init_tok2vec}
 vocab_data = null
+lookups = null
 before_init = null
 after_init = null
 [initialize.components]
 [initialize.tokenizer]

meta.json CHANGED Viewed

@@ -1,14 +1,14 @@
 {
   "lang":"xx",
   "name":"eb_ner",
-  "version":"0.2.0",
   "description":"",
   "author":"",
   "email":"",
   "url":"",
   "license":"",
-  "spacy_version":">=3.6.1,<3.7.0",
-  "spacy_git_version":"458bc5f45",
   "vectors":{
     "width":0,
     "vectors":0,
@@ -36,30 +36,6 @@
   "disabled":[
   ],
-  "performance":{
-    "ents_f":0.9954006007,
-    "ents_p":0.9956087463,
-    "ents_r":0.9951925422,
-    "ents_per_type":{
-      "COURSE_NAME":{
-        "p":0.997066476,
-        "r":0.9973899387,
-        "f":0.9972281811
-      },
-      "LOCATION":{
-        "p":0.9958545643,
-        "r":0.9980093474,
-        "f":0.9969307915
-      },
-      "JOB_TITLE":{
-        "p":0.9908607864,
-        "r":0.9827357238,
-        "f":0.9867815301
-      }
-    },
-    "tok2vec_loss":353.4594166442,
-    "ner_loss":322.656127662
-  },
   "requirements":[
   ]

 {
   "lang":"xx",
   "name":"eb_ner",
+  "version":"0.2.1",
   "description":"",
   "author":"",
   "email":"",
   "url":"",
   "license":"",
+  "spacy_version":">=3.7.4,<3.8.0",
+  "spacy_git_version":"bff8725f4",
   "vectors":{
     "width":0,
     "vectors":0,
   "disabled":[
   ],
   "requirements":[
   ]

ner/model CHANGED Viewed

Binary files a/ner/model and b/ner/model differ

ner/moves CHANGED Viewed

	@@ -1 +1 @@
1	- ��moves�{"0":{},"1":{"COURSE_NAME":~~490453~~,"JOB_TITLE":~~219750~~,"LOCATION":~~200713~~},"2":{"COURSE_NAME":~~490453~~,"JOB_TITLE":~~219750~~,"LOCATION":~~200713~~},"3":{"COURSE_NAME":~~490453~~,"JOB_TITLE":~~219750~~,"LOCATION":~~200713~~},"4":{"COURSE_NAME":~~490453~~,"JOB_TITLE":~~219750~~,"LOCATION":~~200713~~,"":1},"5":{"":1}}�cfg��neg_key�


1	+ ��moves�${"0":{},"1":{"COURSE_NAME":1955153,"JOB_TITLE":1206960,"LOCATION":1154534},"2":{"COURSE_NAME":1955153,"JOB_TITLE":1206960,"LOCATION":1154534},"3":{"COURSE_NAME":1955153,"JOB_TITLE":1206960,"LOCATION":1154534},"4":{"COURSE_NAME":1955153,"JOB_TITLE":1206960,"LOCATION":1154534,"":1},"5":{"":1}}�cfg��neg_key�

tok2vec/model CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0c120186477af10d01381b8c0de2eafbb21ceac1e75e1ce3fafcb07733588b02
-size 34126801

 version https://git-lfs.github.com/spec/v1
+oid sha256:c887bb91d8fb38fbfdf3dbf994d61cfb54b7ce900b89bec0e6c92b8f023f7ee7
+size 3705091

vocab/strings.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

xx_eb_ner-any-py3-none-any.whl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:83e0f69bc0c92307bd6dcb402a148b47f2e94035351ea480fc878e0c1a351d45
-size 31846488

 version https://git-lfs.github.com/spec/v1
+oid sha256:38309fa0b6607c4d8b130659210e2cd400241ce588039e6fede3efbbd4ab1912
+size 7796938