Jacobo commited on
Commit
c561745
·
verified ·
1 Parent(s): cafdc3c

Update spaCy pipeline

Browse files
README.md CHANGED
The diff for this file is too large to render. See raw diff
 
config.cfg CHANGED
@@ -1,6 +1,6 @@
1
  [paths]
2
- train = "corpus/ud/proiel/train"
3
- dev = "corpus/ud/proiel/dev"
4
  vectors = null
5
  init_tok2vec = null
6
 
@@ -10,7 +10,7 @@ seed = 0
10
 
11
  [nlp]
12
  lang = "grc"
13
- pipeline = ["tok2vec","morphologizer","tagger","parser","lemmatizer","attribute_ruler","ner"]
14
  batch_size = 128
15
  disabled = []
16
  before_creation = null
@@ -85,7 +85,7 @@ nO = null
85
  [components.ner.model.tok2vec]
86
  @architectures = "spacy.Tok2VecListener.v1"
87
  width = ${components.tok2vec.model.encode.width}
88
- upstream = "*"
89
 
90
  [components.parser]
91
  factory = "parser"
@@ -174,7 +174,7 @@ accumulate_gradient = 1
174
  patience = 5000
175
  max_epochs = 0
176
  max_steps = 20000
177
- eval_frequency = 200
178
  frozen_components = ["lemmatizer"]
179
  annotating_components = []
180
  before_to_disk = null
 
1
  [paths]
2
+ train = "corpus/parser/proiel/train"
3
+ dev = "corpus/parser/proiel/dev"
4
  vectors = null
5
  init_tok2vec = null
6
 
 
10
 
11
  [nlp]
12
  lang = "grc"
13
+ pipeline = ["tok2vec","morphologizer","tagger","parser","lemmatizer","ner","attribute_ruler"]
14
  batch_size = 128
15
  disabled = []
16
  before_creation = null
 
85
  [components.ner.model.tok2vec]
86
  @architectures = "spacy.Tok2VecListener.v1"
87
  width = ${components.tok2vec.model.encode.width}
88
+ upstream = "tok2vec"
89
 
90
  [components.parser]
91
  factory = "parser"
 
174
  patience = 5000
175
  max_epochs = 0
176
  max_steps = 20000
177
+ eval_frequency = 1000
178
  frozen_components = ["lemmatizer"]
179
  annotating_components = []
180
  before_to_disk = null
grc_proiel_sm-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f238e6e7122a16bfaf5fa53f9e0582cdf637d6a540137ad6d67b7bb2fa0a987
3
- size 60241432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ead29b9c566e482eb55e6a621c37385ffe7dbd63616b042289ee73ba6263792
3
+ size 65450746
lemmatizer/cfg CHANGED
The diff for this file is too large to render. See raw diff
 
lemmatizer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97e1dbe7cc8521056c72501adfc36de5fcb26ee7a98e1ed9117931797e6e88e8
3
- size 24263260
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:becf7d37bd570f1a89a157da3184d2710dccc79804ec6f0ad3cae090aafe5c2a
3
+ size 29215702
lemmatizer/trees CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:087b25d2e16192e3addbfbe3d55ae0b878ee6893648f136313c0d8b21505df7c
3
- size 5318272
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86606a185ea3cbd4c6f071029121d17265dfe9d5a468c6dbeb071a5808cf49f9
3
+ size 6516433
meta.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "lang":"grc",
3
  "name":"proiel_sm",
4
- "version":"3.7",
5
  "description":"",
6
  "author":"",
7
  "email":"",
8
  "url":"",
9
  "license":"",
10
- "spacy_version":">=3.7.2,<3.8.0",
11
- "spacy_git_version":"a89eae928",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
@@ -45,6 +45,7 @@
45
  "Case=Acc|Gender=Fem|Number=Sing|POS=NOUN",
46
  "Aspect=Perf|Mood=Ind|Number=Plur|POS=VERB|Person=3|Tense=Past|VerbForm=Fin|Voice=Act",
47
  "Case=Dat|Gender=Masc|Number=Plur|POS=PRON|PronType=Rcp",
 
48
  "Case=Nom|Definite=Def|Gender=Masc|Number=Plur|POS=DET|PronType=Dem",
49
  "Case=Nom|Gender=Masc|Number=Plur|POS=NOUN",
50
  "Case=Acc|Gender=Masc|Number=Plur|POS=NOUN",
@@ -727,6 +728,7 @@
727
  "Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|POS=VERB|Tense=Past|VerbForm=Part|Voice=Act",
728
  "Aspect=Perf|Mood=Sub|Number=Plur|POS=VERB|Person=2|Tense=Past|VerbForm=Fin|Voice=Mid",
729
  "Case=Dat|Degree=Sup|Gender=Fem|Number=Sing|POS=ADJ",
 
730
  "Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|POS=VERB|Tense=Past|VerbForm=Part|Voice=Pass",
731
  "Case=Acc|Gender=Fem,Masc|Number=Sing|POS=NOUN",
732
  "Case=Dat|Gender=Fem,Masc|Number=Sing|POS=NOUN",
@@ -1073,7 +1075,8 @@
1073
  "Px",
1074
  "R-",
1075
  "S-",
1076
- "V-"
 
1077
  ],
1078
  "parser":[
1079
  "ROOT",
@@ -1105,18 +1108,23 @@
1105
  "obl:agent",
1106
  "orphan",
1107
  "parataxis",
 
1108
  "vocative",
1109
  "xcomp"
1110
- ],
1111
- "attribute_ruler":[
1112
-
1113
  ],
1114
  "ner":[
 
1115
  "GOD",
 
1116
  "LANGUAGE",
1117
  "LOC",
1118
  "NORP",
1119
- "PERSON"
 
 
 
 
 
1120
  ]
1121
  },
1122
  "pipeline":[
@@ -1125,8 +1133,8 @@
1125
  "tagger",
1126
  "parser",
1127
  "lemmatizer",
1128
- "attribute_ruler",
1129
- "ner"
1130
  ],
1131
  "components":[
1132
  "tok2vec",
@@ -1134,40 +1142,40 @@
1134
  "tagger",
1135
  "parser",
1136
  "lemmatizer",
1137
- "attribute_ruler",
1138
- "ner"
1139
  ],
1140
  "disabled":[
1141
 
1142
  ],
1143
  "performance":{
1144
- "pos_acc":0.956782889,
1145
- "morph_acc":0.8730588925,
1146
  "morph_per_feat":{
1147
  "Case":{
1148
- "p":0.9507095793,
1149
- "r":0.9468702675,
1150
- "f":0.9487860395
1151
  },
1152
  "Gender":{
1153
- "p":0.9064610679,
1154
- "r":0.9076177109,
1155
- "f":0.9070390207
1156
  },
1157
  "Number":{
1158
- "p":0.9796532752,
1159
- "r":0.97580395,
1160
- "f":0.9777248239
1161
  },
1162
  "Person":{
1163
- "p":0.9537721893,
1164
- "r":0.9520118125,
1165
- "f":0.9528911879
1166
  },
1167
  "PronType":{
1168
- "p":0.9797186784,
1169
- "r":0.9724025974,
1170
- "f":0.9760469285
1171
  },
1172
  "Polarity":{
1173
  "p":1.0,
@@ -1175,204 +1183,204 @@
1175
  "f":0.9921671018
1176
  },
1177
  "Aspect":{
1178
- "p":0.9161712247,
1179
- "r":0.8745743473,
1180
- "f":0.8948896632
1181
  },
1182
  "Mood":{
1183
- "p":0.9310740953,
1184
- "r":0.9278763595,
1185
- "f":0.9294724771
1186
  },
1187
  "Tense":{
1188
- "p":0.88958117,
1189
- "r":0.8828581244,
1190
- "f":0.8862068966
1191
  },
1192
  "VerbForm":{
1193
- "p":0.962962963,
1194
- "r":0.9550291795,
1195
- "f":0.9589796622
1196
  },
1197
  "Voice":{
1198
- "p":0.9151955694,
1199
- "r":0.9082789419,
1200
- "f":0.9117241379
1201
  },
1202
  "Degree":{
1203
- "p":0.8646723647,
1204
- "r":0.8158602151,
1205
- "f":0.8395573997
1206
  },
1207
  "Definite":{
1208
- "p":0.9871106337,
1209
- "r":0.9929767693,
1210
- "f":0.9900350121
1211
  },
1212
  "Reflex":{
1213
  "p":1.0,
1214
- "r":1.0,
1215
- "f":1.0
1216
  },
1217
  "Poss":{
1218
  "p":1.0,
1219
- "r":0.7368421053,
1220
- "f":0.8484848485
1221
  }
1222
  },
1223
- "tag_acc":0.9596396132,
1224
- "dep_uas":0.7543949604,
1225
- "dep_las":0.6981394667,
1226
  "dep_las_per_type":{
1227
  "nsubj":{
1228
- "p":0.6775956284,
1229
- "r":0.6717226436,
1230
- "f":0.6746463547
1231
  },
1232
  "discourse":{
1233
- "p":0.7506811989,
1234
- "r":0.7568681319,
1235
- "f":0.7537619699
1236
  },
1237
  "mark":{
1238
- "p":0.7863636364,
1239
- "r":0.6975806452,
1240
- "f":0.7393162393
1241
  },
1242
  "advmod":{
1243
- "p":0.6810730253,
1244
  "r":0.6790490342,
1245
- "f":0.6800595238
1246
  },
1247
  "advcl":{
1248
- "p":0.6079234973,
1249
- "r":0.6079234973,
1250
- "f":0.6079234973
1251
  },
1252
  "xcomp":{
1253
- "p":0.4657039711,
1254
- "r":0.516,
1255
- "f":0.4895635674
1256
  },
1257
  "cop":{
1258
- "p":0.7373737374,
1259
- "r":0.6822429907,
1260
- "f":0.7087378641
1261
  },
1262
  "root":{
1263
- "p":0.6861747243,
1264
- "r":0.7939156035,
1265
- "f":0.7361237489
1266
  },
1267
  "det":{
1268
- "p":0.9080610022,
1269
- "r":0.9037294016,
1270
- "f":0.9058900239
1271
  },
1272
  "nmod":{
1273
- "p":0.6631762653,
1274
- "r":0.6690140845,
1275
- "f":0.6660823839
1276
  },
1277
  "obj":{
1278
- "p":0.688286544,
1279
- "r":0.738317757,
1280
- "f":0.7124248497
1281
  },
1282
  "case":{
1283
- "p":0.9454148472,
1284
- "r":0.9495614035,
1285
- "f":0.9474835886
1286
  },
1287
  "obl":{
1288
- "p":0.6643026005,
1289
- "r":0.6862026862,
1290
- "f":0.6750750751
1291
  },
1292
  "cc":{
1293
- "p":0.5436590437,
1294
- "r":0.5380658436,
1295
- "f":0.5408479835
1296
  },
1297
  "conj":{
1298
- "p":0.5023771791,
1299
- "r":0.4160104987,
1300
- "f":0.4551328069
1301
  },
1302
  "obl:agent":{
1303
- "p":0.6785714286,
1304
- "r":0.5135135135,
1305
- "f":0.5846153846
1306
  },
1307
  "ccomp":{
1308
- "p":0.4927536232,
1309
- "r":0.3383084577,
1310
- "f":0.401179941
1311
  },
1312
  "nsubj:pass":{
1313
- "p":0.5764705882,
1314
- "r":0.4579439252,
1315
- "f":0.5104166667
1316
  },
1317
  "amod":{
1318
- "p":0.6436781609,
1319
- "r":0.5258215962,
1320
- "f":0.5788113695
1321
  },
1322
  "acl":{
1323
- "p":0.4097222222,
1324
- "r":0.3575757576,
1325
- "f":0.3818770227
1326
  },
1327
  "iobj":{
1328
- "p":0.6770601336,
1329
- "r":0.7020785219,
1330
- "f":0.6893424036
1331
  },
1332
  "dep":{
1333
  "p":0.0,
1334
  "r":0.0,
1335
  "f":0.0
1336
  },
1337
- "parataxis":{
1338
- "p":0.0666666667,
1339
- "r":0.05,
1340
- "f":0.0571428571
1341
- },
1342
- "dislocated":{
1343
- "p":0.0,
1344
- "r":0.0,
1345
- "f":0.0
1346
- },
1347
- "appos":{
1348
- "p":0.3363636364,
1349
- "r":0.2624113475,
1350
- "f":0.2948207171
1351
- },
1352
  "nummod":{
1353
- "p":0.8545454545,
1354
  "r":0.6911764706,
1355
- "f":0.7642276423
1356
  },
1357
  "vocative":{
1358
- "p":0.6923076923,
1359
- "r":0.5217391304,
1360
- "f":0.5950413223
1361
  },
1362
  "orphan":{
1363
- "p":0.1714285714,
1364
- "r":0.1395348837,
1365
- "f":0.1538461538
1366
  },
1367
- "csubj:pass":{
1368
- "p":1.0,
1369
- "r":0.2,
1370
- "f":0.3333333333
 
 
 
 
 
1371
  },
1372
  "flat:name":{
1373
- "p":0.8125,
1374
  "r":0.5909090909,
1375
- "f":0.6842105263
 
 
 
 
 
 
 
 
 
 
1376
  },
1377
  "aux:pass":{
1378
  "p":0.0,
@@ -1380,9 +1388,9 @@
1380
  "f":0.0
1381
  },
1382
  "fixed":{
1383
- "p":0.6,
1384
- "r":0.6,
1385
- "f":0.6
1386
  },
1387
  "aux":{
1388
  "p":0.0,
@@ -1390,40 +1398,60 @@
1390
  "f":0.0
1391
  }
1392
  },
1393
- "sents_p":0.4235924933,
1394
- "sents_r":0.5244813278,
1395
- "sents_f":0.4686688914,
1396
- "lemma_acc":0.9657193085,
1397
- "ents_f":0.9087591241,
1398
- "ents_p":0.902173913,
1399
- "ents_r":0.9154411765,
1400
  "ents_per_type":{
1401
- "PERSON":{
1402
- "p":0.8951612903,
1403
- "r":0.9487179487,
1404
- "f":0.9211618257
1405
- },
1406
  "LOC":{
1407
- "p":0.9152542373,
1408
- "r":0.8709677419,
1409
- "f":0.8925619835
1410
  },
1411
  "NORP":{
1412
- "p":0.9101123596,
1413
- "r":0.9418604651,
1414
- "f":0.9257142857
1415
  },
1416
  "GOD":{
1417
- "p":0.75,
1418
- "r":0.4285714286,
1419
- "f":0.5454545455
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1420
  }
1421
  },
1422
- "tok2vec_loss":37768.4657593779,
1423
- "morphologizer_loss":2533.5016539097,
1424
- "tagger_loss":755.3283261061,
1425
- "parser_loss":22103.8544994662,
1426
- "ner_loss":44.2601226536
1427
  },
1428
  "requirements":[
1429
 
 
1
  {
2
  "lang":"grc",
3
  "name":"proiel_sm",
4
+ "version":"3.7.5",
5
  "description":"",
6
  "author":"",
7
  "email":"",
8
  "url":"",
9
  "license":"",
10
+ "spacy_version":">=3.7.5,<3.8.0",
11
+ "spacy_git_version":"a6d0fc360",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
 
45
  "Case=Acc|Gender=Fem|Number=Sing|POS=NOUN",
46
  "Aspect=Perf|Mood=Ind|Number=Plur|POS=VERB|Person=3|Tense=Past|VerbForm=Fin|Voice=Act",
47
  "Case=Dat|Gender=Masc|Number=Plur|POS=PRON|PronType=Rcp",
48
+ "POS=PUNCT",
49
  "Case=Nom|Definite=Def|Gender=Masc|Number=Plur|POS=DET|PronType=Dem",
50
  "Case=Nom|Gender=Masc|Number=Plur|POS=NOUN",
51
  "Case=Acc|Gender=Masc|Number=Plur|POS=NOUN",
 
728
  "Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|POS=VERB|Tense=Past|VerbForm=Part|Voice=Act",
729
  "Aspect=Perf|Mood=Sub|Number=Plur|POS=VERB|Person=2|Tense=Past|VerbForm=Fin|Voice=Mid",
730
  "Case=Dat|Degree=Sup|Gender=Fem|Number=Sing|POS=ADJ",
731
+ "POS=AUX",
732
  "Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|POS=VERB|Tense=Past|VerbForm=Part|Voice=Pass",
733
  "Case=Acc|Gender=Fem,Masc|Number=Sing|POS=NOUN",
734
  "Case=Dat|Gender=Fem,Masc|Number=Sing|POS=NOUN",
 
1075
  "Px",
1076
  "R-",
1077
  "S-",
1078
+ "V-",
1079
+ "Z"
1080
  ],
1081
  "parser":[
1082
  "ROOT",
 
1108
  "obl:agent",
1109
  "orphan",
1110
  "parataxis",
1111
+ "punct",
1112
  "vocative",
1113
  "xcomp"
 
 
 
1114
  ],
1115
  "ner":[
1116
+ "EVENT",
1117
  "GOD",
1118
+ "GPE",
1119
  "LANGUAGE",
1120
  "LOC",
1121
  "NORP",
1122
+ "ORG",
1123
+ "PERSON",
1124
+ "WORK"
1125
+ ],
1126
+ "attribute_ruler":[
1127
+
1128
  ]
1129
  },
1130
  "pipeline":[
 
1133
  "tagger",
1134
  "parser",
1135
  "lemmatizer",
1136
+ "ner",
1137
+ "attribute_ruler"
1138
  ],
1139
  "components":[
1140
  "tok2vec",
 
1142
  "tagger",
1143
  "parser",
1144
  "lemmatizer",
1145
+ "ner",
1146
+ "attribute_ruler"
1147
  ],
1148
  "disabled":[
1149
 
1150
  ],
1151
  "performance":{
1152
+ "pos_acc":0.9608070343,
1153
+ "morph_acc":0.8837161748,
1154
  "morph_per_feat":{
1155
  "Case":{
1156
+ "p":0.95349425,
1157
+ "r":0.9521706209,
1158
+ "f":0.9528319758
1159
  },
1160
  "Gender":{
1161
+ "p":0.9060394151,
1162
+ "r":0.9092765089,
1163
+ "f":0.9076550758
1164
  },
1165
  "Number":{
1166
+ "p":0.9798963731,
1167
+ "r":0.9777685865,
1168
+ "f":0.9788313234
1169
  },
1170
  "Person":{
1171
+ "p":0.9572966951,
1172
+ "r":0.9516426726,
1173
+ "f":0.9544613106
1174
  },
1175
  "PronType":{
1176
+ "p":0.9758800522,
1177
+ "r":0.9720779221,
1178
+ "f":0.9739752765
1179
  },
1180
  "Polarity":{
1181
  "p":1.0,
 
1183
  "f":0.9921671018
1184
  },
1185
  "Aspect":{
1186
+ "p":0.9041016753,
1187
+ "r":0.8881952327,
1188
+ "f":0.89607787
1189
  },
1190
  "Mood":{
1191
+ "p":0.9453080023,
1192
+ "r":0.9398969662,
1193
+ "f":0.9425947187
1194
  },
1195
  "Tense":{
1196
+ "p":0.8910720441,
1197
+ "r":0.8880109928,
1198
+ "f":0.8895388851
1199
  },
1200
  "VerbForm":{
1201
+ "p":0.964150293,
1202
+ "r":0.9601785101,
1203
+ "f":0.9621603027
1204
  },
1205
  "Voice":{
1206
+ "p":0.9227852465,
1207
+ "r":0.9196152525,
1208
+ "f":0.9211975224
1209
  },
1210
  "Degree":{
1211
+ "p":0.8845070423,
1212
+ "r":0.8440860215,
1213
+ "f":0.863823934
1214
  },
1215
  "Definite":{
1216
+ "p":0.9808408728,
1217
+ "r":0.9956780119,
1218
+ "f":0.9882037534
1219
  },
1220
  "Reflex":{
1221
  "p":1.0,
1222
+ "r":0.9761904762,
1223
+ "f":0.9879518072
1224
  },
1225
  "Poss":{
1226
  "p":1.0,
1227
+ "r":0.7894736842,
1228
+ "f":0.8823529412
1229
  }
1230
  },
1231
+ "tag_acc":0.9627155613,
1232
+ "dep_uas":0.7877436104,
1233
+ "dep_las":0.7298987166,
1234
  "dep_las_per_type":{
1235
  "nsubj":{
1236
+ "p":0.6856540084,
1237
+ "r":0.7042253521,
1238
+ "f":0.6948156066
1239
  },
1240
  "discourse":{
1241
+ "p":0.7496688742,
1242
+ "r":0.7774725275,
1243
+ "f":0.7633175995
1244
  },
1245
  "mark":{
1246
+ "p":0.7991452991,
1247
+ "r":0.7540322581,
1248
+ "f":0.77593361
1249
  },
1250
  "advmod":{
1251
+ "p":0.664244186,
1252
  "r":0.6790490342,
1253
+ "f":0.6715650257
1254
  },
1255
  "advcl":{
1256
+ "p":0.6475300401,
1257
+ "r":0.662568306,
1258
+ "f":0.6549628629
1259
  },
1260
  "xcomp":{
1261
+ "p":0.4798206278,
1262
+ "r":0.428,
1263
+ "f":0.4524312896
1264
  },
1265
  "cop":{
1266
+ "p":0.7378640777,
1267
+ "r":0.7102803738,
1268
+ "f":0.7238095238
1269
  },
1270
  "root":{
1271
+ "p":0.8932419197,
1272
+ "r":0.8949950932,
1273
+ "f":0.8941176471
1274
  },
1275
  "det":{
1276
+ "p":0.889314602,
1277
+ "r":0.9058976583,
1278
+ "f":0.8975295381
1279
  },
1280
  "nmod":{
1281
+ "p":0.7017892644,
1282
+ "r":0.6214788732,
1283
+ "f":0.6591970121
1284
  },
1285
  "obj":{
1286
+ "p":0.7190332326,
1287
+ "r":0.7414330218,
1288
+ "f":0.7300613497
1289
  },
1290
  "case":{
1291
+ "p":0.9506578947,
1292
+ "r":0.9506578947,
1293
+ "f":0.9506578947
1294
  },
1295
  "obl":{
1296
+ "p":0.678313253,
1297
+ "r":0.6874236874,
1298
+ "f":0.6828380837
1299
  },
1300
  "cc":{
1301
+ "p":0.6835051546,
1302
+ "r":0.6820987654,
1303
+ "f":0.6828012358
1304
  },
1305
  "conj":{
1306
+ "p":0.5748987854,
1307
+ "r":0.5590551181,
1308
+ "f":0.5668662675
1309
  },
1310
  "obl:agent":{
1311
+ "p":0.6,
1312
+ "r":0.3243243243,
1313
+ "f":0.4210526316
1314
  },
1315
  "ccomp":{
1316
+ "p":0.4830917874,
1317
+ "r":0.4975124378,
1318
+ "f":0.4901960784
1319
  },
1320
  "nsubj:pass":{
1321
+ "p":0.6,
1322
+ "r":0.5046728972,
1323
+ "f":0.5482233503
1324
  },
1325
  "amod":{
1326
+ "p":0.6315789474,
1327
+ "r":0.5633802817,
1328
+ "f":0.5955334988
1329
  },
1330
  "acl":{
1331
+ "p":0.4476190476,
1332
+ "r":0.2848484848,
1333
+ "f":0.3481481481
1334
  },
1335
  "iobj":{
1336
+ "p":0.6682134571,
1337
+ "r":0.6651270208,
1338
+ "f":0.6666666667
1339
  },
1340
  "dep":{
1341
  "p":0.0,
1342
  "r":0.0,
1343
  "f":0.0
1344
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1345
  "nummod":{
1346
+ "p":0.7580645161,
1347
  "r":0.6911764706,
1348
+ "f":0.7230769231
1349
  },
1350
  "vocative":{
1351
+ "p":0.8225806452,
1352
+ "r":0.7391304348,
1353
+ "f":0.7786259542
1354
  },
1355
  "orphan":{
1356
+ "p":0.2413793103,
1357
+ "r":0.1627906977,
1358
+ "f":0.1944444444
1359
  },
1360
+ "appos":{
1361
+ "p":0.347107438,
1362
+ "r":0.2978723404,
1363
+ "f":0.320610687
1364
+ },
1365
+ "parataxis":{
1366
+ "p":0.3333333333,
1367
+ "r":0.15,
1368
+ "f":0.2068965517
1369
  },
1370
  "flat:name":{
1371
+ "p":0.7222222222,
1372
  "r":0.5909090909,
1373
+ "f":0.65
1374
+ },
1375
+ "dislocated":{
1376
+ "p":0.1428571429,
1377
+ "r":0.0769230769,
1378
+ "f":0.1
1379
+ },
1380
+ "csubj:pass":{
1381
+ "p":0.0,
1382
+ "r":0.0,
1383
+ "f":0.0
1384
  },
1385
  "aux:pass":{
1386
  "p":0.0,
 
1388
  "f":0.0
1389
  },
1390
  "fixed":{
1391
+ "p":0.6666666667,
1392
+ "r":0.4,
1393
+ "f":0.5
1394
  },
1395
  "aux":{
1396
  "p":0.0,
 
1398
  "f":0.0
1399
  }
1400
  },
1401
+ "sents_p":0.919921875,
1402
+ "sents_r":0.9607343192,
1403
+ "sents_f":0.9398852582,
1404
+ "lemma_acc":0.9644196033,
1405
+ "ents_f":0.845430613,
1406
+ "ents_p":0.8419475655,
1407
+ "ents_r":0.8489425982,
1408
  "ents_per_type":{
 
 
 
 
 
1409
  "LOC":{
1410
+ "p":0.7592592593,
1411
+ "r":0.7387387387,
1412
+ "f":0.7488584475
1413
  },
1414
  "NORP":{
1415
+ "p":0.8790123457,
1416
+ "r":0.8557692308,
1417
+ "f":0.8672350792
1418
  },
1419
  "GOD":{
1420
+ "p":0.7254901961,
1421
+ "r":0.6981132075,
1422
+ "f":0.7115384615
1423
+ },
1424
+ "PERSON":{
1425
+ "p":0.8569254186,
1426
+ "r":0.9051446945,
1427
+ "f":0.8803752932
1428
+ },
1429
+ "GPE":{
1430
+ "p":0.6,
1431
+ "r":0.75,
1432
+ "f":0.6666666667
1433
+ },
1434
+ "ORG":{
1435
+ "p":1.0,
1436
+ "r":0.25,
1437
+ "f":0.4
1438
+ },
1439
+ "LANGUAGE":{
1440
+ "p":0.0,
1441
+ "r":0.0,
1442
+ "f":0.0
1443
+ },
1444
+ "WORK":{
1445
+ "p":0.0,
1446
+ "r":0.0,
1447
+ "f":0.0
1448
  }
1449
  },
1450
+ "tok2vec_loss":218125.6179342582,
1451
+ "morphologizer_loss":10762.2980411053,
1452
+ "tagger_loss":3213.6394980699,
1453
+ "parser_loss":71012.6608801985,
1454
+ "ner_loss":782.5167711947
1455
  },
1456
  "requirements":[
1457
 
morphologizer/cfg CHANGED
@@ -27,6 +27,7 @@
27
  "Case=Acc|Gender=Fem|Number=Sing|POS=NOUN":"Case=Acc|Gender=Fem|Number=Sing",
28
  "Aspect=Perf|Mood=Ind|Number=Plur|POS=VERB|Person=3|Tense=Past|VerbForm=Fin|Voice=Act":"Aspect=Perf|Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin|Voice=Act",
29
  "Case=Dat|Gender=Masc|Number=Plur|POS=PRON|PronType=Rcp":"Case=Dat|Gender=Masc|Number=Plur|PronType=Rcp",
 
30
  "Case=Nom|Definite=Def|Gender=Masc|Number=Plur|POS=DET|PronType=Dem":"Case=Nom|Definite=Def|Gender=Masc|Number=Plur|PronType=Dem",
31
  "Case=Nom|Gender=Masc|Number=Plur|POS=NOUN":"Case=Nom|Gender=Masc|Number=Plur",
32
  "Case=Acc|Gender=Masc|Number=Plur|POS=NOUN":"Case=Acc|Gender=Masc|Number=Plur",
@@ -709,6 +710,7 @@
709
  "Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|POS=VERB|Tense=Past|VerbForm=Part|Voice=Act":"Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act",
710
  "Aspect=Perf|Mood=Sub|Number=Plur|POS=VERB|Person=2|Tense=Past|VerbForm=Fin|Voice=Mid":"Aspect=Perf|Mood=Sub|Number=Plur|Person=2|Tense=Past|VerbForm=Fin|Voice=Mid",
711
  "Case=Dat|Degree=Sup|Gender=Fem|Number=Sing|POS=ADJ":"Case=Dat|Degree=Sup|Gender=Fem|Number=Sing",
 
712
  "Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|POS=VERB|Tense=Past|VerbForm=Part|Voice=Pass":"Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass",
713
  "Case=Acc|Gender=Fem,Masc|Number=Sing|POS=NOUN":"Case=Acc|Gender=Fem,Masc|Number=Sing",
714
  "Case=Dat|Gender=Fem,Masc|Number=Sing|POS=NOUN":"Case=Dat|Gender=Fem,Masc|Number=Sing",
@@ -1058,6 +1060,7 @@
1058
  "Case=Acc|Gender=Fem|Number=Sing|POS=NOUN":92,
1059
  "Aspect=Perf|Mood=Ind|Number=Plur|POS=VERB|Person=3|Tense=Past|VerbForm=Fin|Voice=Act":100,
1060
  "Case=Dat|Gender=Masc|Number=Plur|POS=PRON|PronType=Rcp":95,
 
1061
  "Case=Nom|Definite=Def|Gender=Masc|Number=Plur|POS=DET|PronType=Dem":90,
1062
  "Case=Nom|Gender=Masc|Number=Plur|POS=NOUN":92,
1063
  "Case=Acc|Gender=Masc|Number=Plur|POS=NOUN":92,
@@ -1740,6 +1743,7 @@
1740
  "Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|POS=VERB|Tense=Past|VerbForm=Part|Voice=Act":100,
1741
  "Aspect=Perf|Mood=Sub|Number=Plur|POS=VERB|Person=2|Tense=Past|VerbForm=Fin|Voice=Mid":100,
1742
  "Case=Dat|Degree=Sup|Gender=Fem|Number=Sing|POS=ADJ":84,
 
1743
  "Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|POS=VERB|Tense=Past|VerbForm=Part|Voice=Pass":100,
1744
  "Case=Acc|Gender=Fem,Masc|Number=Sing|POS=NOUN":92,
1745
  "Case=Dat|Gender=Fem,Masc|Number=Sing|POS=NOUN":92,
 
27
  "Case=Acc|Gender=Fem|Number=Sing|POS=NOUN":"Case=Acc|Gender=Fem|Number=Sing",
28
  "Aspect=Perf|Mood=Ind|Number=Plur|POS=VERB|Person=3|Tense=Past|VerbForm=Fin|Voice=Act":"Aspect=Perf|Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin|Voice=Act",
29
  "Case=Dat|Gender=Masc|Number=Plur|POS=PRON|PronType=Rcp":"Case=Dat|Gender=Masc|Number=Plur|PronType=Rcp",
30
+ "POS=PUNCT":"",
31
  "Case=Nom|Definite=Def|Gender=Masc|Number=Plur|POS=DET|PronType=Dem":"Case=Nom|Definite=Def|Gender=Masc|Number=Plur|PronType=Dem",
32
  "Case=Nom|Gender=Masc|Number=Plur|POS=NOUN":"Case=Nom|Gender=Masc|Number=Plur",
33
  "Case=Acc|Gender=Masc|Number=Plur|POS=NOUN":"Case=Acc|Gender=Masc|Number=Plur",
 
710
  "Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|POS=VERB|Tense=Past|VerbForm=Part|Voice=Act":"Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act",
711
  "Aspect=Perf|Mood=Sub|Number=Plur|POS=VERB|Person=2|Tense=Past|VerbForm=Fin|Voice=Mid":"Aspect=Perf|Mood=Sub|Number=Plur|Person=2|Tense=Past|VerbForm=Fin|Voice=Mid",
712
  "Case=Dat|Degree=Sup|Gender=Fem|Number=Sing|POS=ADJ":"Case=Dat|Degree=Sup|Gender=Fem|Number=Sing",
713
+ "POS=AUX":"",
714
  "Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|POS=VERB|Tense=Past|VerbForm=Part|Voice=Pass":"Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass",
715
  "Case=Acc|Gender=Fem,Masc|Number=Sing|POS=NOUN":"Case=Acc|Gender=Fem,Masc|Number=Sing",
716
  "Case=Dat|Gender=Fem,Masc|Number=Sing|POS=NOUN":"Case=Dat|Gender=Fem,Masc|Number=Sing",
 
1060
  "Case=Acc|Gender=Fem|Number=Sing|POS=NOUN":92,
1061
  "Aspect=Perf|Mood=Ind|Number=Plur|POS=VERB|Person=3|Tense=Past|VerbForm=Fin|Voice=Act":100,
1062
  "Case=Dat|Gender=Masc|Number=Plur|POS=PRON|PronType=Rcp":95,
1063
+ "POS=PUNCT":97,
1064
  "Case=Nom|Definite=Def|Gender=Masc|Number=Plur|POS=DET|PronType=Dem":90,
1065
  "Case=Nom|Gender=Masc|Number=Plur|POS=NOUN":92,
1066
  "Case=Acc|Gender=Masc|Number=Plur|POS=NOUN":92,
 
1743
  "Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|POS=VERB|Tense=Past|VerbForm=Part|Voice=Act":100,
1744
  "Aspect=Perf|Mood=Sub|Number=Plur|POS=VERB|Person=2|Tense=Past|VerbForm=Fin|Voice=Mid":100,
1745
  "Case=Dat|Degree=Sup|Gender=Fem|Number=Sing|POS=ADJ":84,
1746
+ "POS=AUX":87,
1747
  "Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|POS=VERB|Tense=Past|VerbForm=Part|Voice=Pass":100,
1748
  "Case=Acc|Gender=Fem,Masc|Number=Sing|POS=NOUN":92,
1749
  "Case=Dat|Gender=Fem,Masc|Number=Sing|POS=NOUN":92,
morphologizer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8af7e7364191fb5c23ae55ba167d849199ad927f7cd62226a750326f1953c7a0
3
- size 1058262
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab750530616a1813faeeb4c5036d869a10878dc53091ffcd6d9a609c031d1ee0
3
+ size 1060318
ner/model CHANGED
Binary files a/ner/model and b/ner/model differ
 
ner/moves CHANGED
@@ -1 +1 @@
1
- ��moves�{"0":{},"1":{"PERSON":626,"NORP":479,"LOC":257,"GOD":44,"LANGUAGE":4},"2":{"PERSON":626,"NORP":479,"LOC":257,"GOD":44,"LANGUAGE":4},"3":{"PERSON":626,"NORP":479,"LOC":257,"GOD":44,"LANGUAGE":4},"4":{"PERSON":626,"NORP":479,"LOC":257,"GOD":44,"LANGUAGE":4,"":1},"5":{"":1}}�cfg��neg_key�
 
1
+ ��moves��{"0":{},"1":{"PERSON":2290,"NORP":1501,"LOC":790,"GOD":250,"GPE":35,"LANGUAGE":12,"ORG":5,"WORK":2,"EVENT":1},"2":{"PERSON":2290,"NORP":1501,"LOC":790,"GOD":250,"GPE":35,"LANGUAGE":12,"ORG":5,"WORK":2,"EVENT":1},"3":{"PERSON":2290,"NORP":1501,"LOC":790,"GOD":250,"GPE":35,"LANGUAGE":12,"ORG":5,"WORK":2,"EVENT":1},"4":{"PERSON":2290,"NORP":1501,"LOC":790,"GOD":250,"GPE":35,"LANGUAGE":12,"ORG":5,"WORK":2,"EVENT":1,"":1},"5":{"":1}}�cfg��neg_key�
parser/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:deee49680cd87b6e61a7bbfdecd12b113f2080d81ec4b849bf11a335a4be6950
3
- size 1782525
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec878aeb06475d620879b5a26847f9c2318c9fe7cc87f91ad549e56c58a617cf
3
+ size 1783041
parser/moves CHANGED
@@ -1 +1 @@
1
- ��moves�:{"0":{"":99376},"1":{"":72645},"2":{"det":25097,"case":13178,"advmod":8281,"nsubj":8155,"discourse":7820,"advcl":5178,"obj":4646,"obl":4348,"mark":3054,"cc":2782,"iobj":1765,"cop":1464,"nmod":1391,"amod":1350,"nsubj:pass":966,"det||nsubj":961,"xcomp":917,"vocative":752,"nummod":549,"mark||advcl":409,"case||obl":395,"obj||advcl":366,"det||obj":353,"dislocated":261,"acl":254,"obl||advcl":245,"orphan":206,"nmod||nsubj":192,"nsubj||advcl":175,"ccomp":161,"nsubj||ccomp":158,"det||nsubj:pass":140,"advmod||advcl":136,"obj||xcomp":132,"obl:agent":126,"cc||advcl":121,"conj||advcl":118,"det||obl":115,"nmod||obj":108,"parataxis":106,"det||iobj":94,"amod||obj":88,"det||nmod":79,"xcomp||advcl":77,"amod||nsubj":75,"obj||ccomp":71,"iobj||advcl":70,"obl||xcomp":64,"iobj||xcomp":64,"advmod||xcomp":55,"advmod||ccomp":49,"appos||nsubj":47,"obl||ccomp":45,"ccomp||advcl":44,"det||advmod":42,"cc||nsubj":42,"nmod||obl":41,"advmod||advmod":39,"nsubj:pass||advcl":34,"iobj||ccomp":34,"amod||obl":30,"dep":0},"3":{"conj":8819,"cc":8497,"obl":7446,"obj":6636,"nmod":5543,"nsubj":3918,"advcl":3876,"det":3859,"iobj":3825,"xcomp":2226,"ccomp":2144,"discourse":2105,"advmod":1915,"appos":1627,"acl":1443,"amod":1361,"cop":1355,"nsubj:pass":710,"orphan":452,"obl:agent":273,"flat:name":248,"vocative":243,"nummod":240,"acl||obj":174,"acl||nsubj":152,"fixed":148,"appos||nsubj":120,"csubj:pass":118,"nmod||obj":116,"conj||nsubj":113,"parataxis":110,"cc||nsubj":110,"nmod||nsubj":102,"conj||obj":101,"cc||obj":92,"appos||obj":84,"amod||obj":79,"case":69,"conj||obl":65,"cc||obl":63,"amod||nsubj":63,"dislocated":59,"det||obj":54,"acl||obl":53,"appos||obl":48,"conj||xcomp":46,"det||nsubj":45,"cop||xcomp":40,"iobj||xcomp":38,"conj||nmod":37,"obl||xcomp":35,"conj||iobj":35,"cc||nmod":35,"cop||ccomp":34,"cc||iobj":33,"cc||xcomp":32,"dep":0},"4":{"":20653,"ROOT":15014}}�cfg��neg_key�
 
1
+ ��moves�H{"0":{"":99376},"1":{"":87659},"2":{"det":25097,"case":13178,"advmod":8281,"nsubj":8155,"discourse":7820,"advcl":5178,"obj":4646,"obl":4348,"mark":3054,"cc":2782,"iobj":1765,"cop":1464,"nmod":1391,"amod":1350,"nsubj:pass":966,"det||nsubj":961,"xcomp":917,"vocative":752,"nummod":549,"mark||advcl":409,"case||obl":395,"obj||advcl":366,"det||obj":353,"dislocated":261,"acl":254,"obl||advcl":245,"orphan":206,"nmod||nsubj":192,"nsubj||advcl":175,"ccomp":161,"nsubj||ccomp":158,"det||nsubj:pass":140,"advmod||advcl":136,"obj||xcomp":132,"obl:agent":126,"cc||advcl":121,"conj||advcl":118,"det||obl":115,"nmod||obj":108,"parataxis":106,"det||iobj":94,"amod||obj":88,"det||nmod":79,"xcomp||advcl":77,"amod||nsubj":75,"obj||ccomp":71,"iobj||advcl":70,"obl||xcomp":64,"iobj||xcomp":64,"advmod||xcomp":55,"advmod||ccomp":49,"appos||nsubj":47,"obl||ccomp":45,"ccomp||advcl":44,"det||advmod":42,"cc||nsubj":42,"nmod||obl":41,"advmod||advmod":39,"nsubj:pass||advcl":34,"iobj||ccomp":34,"amod||obl":30,"dep":0},"3":{"punct":15014,"conj":8819,"cc":8497,"obl":7446,"obj":6636,"nmod":5543,"nsubj":3918,"advcl":3876,"det":3859,"iobj":3825,"xcomp":2226,"ccomp":2144,"discourse":2105,"advmod":1915,"appos":1627,"acl":1443,"amod":1361,"cop":1355,"nsubj:pass":710,"orphan":452,"obl:agent":273,"flat:name":248,"vocative":243,"nummod":240,"acl||obj":174,"acl||nsubj":152,"fixed":148,"appos||nsubj":120,"csubj:pass":118,"nmod||obj":116,"conj||nsubj":113,"parataxis":110,"cc||nsubj":110,"nmod||nsubj":102,"conj||obj":101,"cc||obj":92,"appos||obj":84,"amod||obj":79,"case":69,"conj||obl":65,"cc||obl":63,"amod||nsubj":63,"dislocated":59,"det||obj":54,"acl||obl":53,"appos||obl":48,"conj||xcomp":46,"det||nsubj":45,"cop||xcomp":40,"iobj||xcomp":38,"conj||nmod":37,"obl||xcomp":35,"conj||iobj":35,"cc||nmod":35,"cop||ccomp":34,"cc||iobj":33,"cc||xcomp":32,"dep":0},"4":{"":67143,"ROOT":15014}}�cfg��neg_key�
tagger/cfg CHANGED
@@ -23,7 +23,8 @@
23
  "Px",
24
  "R-",
25
  "S-",
26
- "V-"
 
27
  ],
28
  "neg_prefix":"!",
29
  "overwrite":false
 
23
  "Px",
24
  "R-",
25
  "S-",
26
+ "V-",
27
+ "Z"
28
  ],
29
  "neg_prefix":"!",
30
  "overwrite":false
tagger/model CHANGED
Binary files a/tagger/model and b/tagger/model differ
 
tok2vec/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fa69d9b5461f3a6ea11b50a5592d613c61211c31d63c3803cc805415b26531b
3
  size 34875837
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7296c46abe5e9c738e61716416bc2251a1639dae25ffb58893392a298410e22b
3
  size 34875837
vocab/strings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2b94f06e138f6c6f1aa4755663eb16ee57cfd667c47ac056efea6c745925bd0
3
- size 18291136
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:561a327b17deb9ca189328936bf9a0cf4d3dbac668e53cb63a4e429557d5b5d6
3
+ size 22818322