Commit
·
aa2ea32
1
Parent(s):
92f0b14
multi-syllable words
Browse files
maker.py
CHANGED
@@ -21,36 +21,22 @@ class UDgoeswithDataset(object):
|
|
21 |
if len(t)==10 and t[0].isdecimal():
|
22 |
c.append(t)
|
23 |
elif c!=[]:
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
for
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
v=tokenizer([t[1].replace("_"," ") for t in c],add_special_tokens=False)["input_ids"]
|
41 |
-
for i in range(len(v)-1,-1,-1):
|
42 |
-
for j in range(1,len(v[i])):
|
43 |
-
c.insert(i+1,[c[i][0],"_","_","X","_","_",c[i][0],"goeswith","_","_"])
|
44 |
-
y=["0"]+[t[0] for t in c]
|
45 |
-
h=[i if t[6]=="0" else y.index(t[6]) for i,t in enumerate(c,1)]
|
46 |
-
p,v=[t[3]+"|"+t[5]+"|"+t[7] for t in c],sum(v,[])
|
47 |
-
if len(v)<tokenizer.model_max_length-3:
|
48 |
-
self.ids.append([cls]+v+[sep])
|
49 |
-
self.tags.append([dep]+p+[dep])
|
50 |
-
label=set(sum([self.tags[-1],list(label)],[]))
|
51 |
-
for i,k in enumerate(v):
|
52 |
-
self.ids.append([cls]+v[0:i]+[msk]+v[i+1:]+[sep,k])
|
53 |
-
self.tags.append([dep]+[t if h[j]==i+1 else dep for j,t in enumerate(p)]+[dep,dep])
|
54 |
c=[]
|
55 |
self.label2id={l:i for i,l in enumerate(sorted(label))}
|
56 |
def __call__(*args):
|
|
|
21 |
if len(t)==10 and t[0].isdecimal():
|
22 |
c.append(t)
|
23 |
elif c!=[]:
|
24 |
+
for x in [lambda i:i.replace(" ","_"),lambda i:i.replace("_"," ")]:
|
25 |
+
d=list(c)
|
26 |
+
v=tokenizer([x(t[1]) for t in d],add_special_tokens=False)["input_ids"]
|
27 |
+
for i in range(len(v)-1,-1,-1):
|
28 |
+
for j in range(1,len(v[i])):
|
29 |
+
d.insert(i+1,[d[i][0],"_","_","X","_","_",d[i][0],"goeswith","_","_"])
|
30 |
+
y=["0"]+[t[0] for t in d]
|
31 |
+
h=[i if t[6]=="0" else y.index(t[6]) for i,t in enumerate(d,1)]
|
32 |
+
p,v=[t[3]+"|"+t[5]+"|"+t[7] for t in d],sum(v,[])
|
33 |
+
if len(v)<tokenizer.model_max_length-3:
|
34 |
+
self.ids.append([cls]+v+[sep])
|
35 |
+
self.tags.append([dep]+p+[dep])
|
36 |
+
label=set(sum([self.tags[-1],list(label)],[]))
|
37 |
+
for i,k in enumerate(v):
|
38 |
+
self.ids.append([cls]+v[0:i]+[msk]+v[i+1:]+[sep,k])
|
39 |
+
self.tags.append([dep]+[t if h[j]==i+1 else dep for j,t in enumerate(p)]+[dep,dep])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
c=[]
|
41 |
self.label2id={l:i for i,l in enumerate(sorted(label))}
|
42 |
def __call__(*args):
|