use all pairs for fine-tuning, seq len 128
Browse files- finetune.py +6 -7
- pytorch_model.bin +1 -1
- pytorch_model.onnx +1 -1
finetune.py
CHANGED
@@ -12,7 +12,7 @@ import gzip
|
|
12 |
|
13 |
model_name = 'sentence-transformers/all-MiniLM-L6-v2'
|
14 |
|
15 |
-
train_batch_size =
|
16 |
max_seq_length = 128
|
17 |
num_epochs = 1
|
18 |
warmup_steps = 1000
|
@@ -25,13 +25,12 @@ class ESCIDataset(Dataset):
|
|
25 |
with gzip.open(input) as jsonfile:
|
26 |
for line in jsonfile.readlines():
|
27 |
query = json.loads(line)
|
28 |
-
|
29 |
-
|
30 |
-
p = random.choice(query['e'])
|
31 |
positive = p['title']
|
32 |
-
n
|
33 |
-
|
34 |
-
|
35 |
|
36 |
def __getitem__(self, item):
|
37 |
return self.queries[item]
|
|
|
12 |
|
13 |
model_name = 'sentence-transformers/all-MiniLM-L6-v2'
|
14 |
|
15 |
+
train_batch_size = 100
|
16 |
max_seq_length = 128
|
17 |
num_epochs = 1
|
18 |
warmup_steps = 1000
|
|
|
25 |
with gzip.open(input) as jsonfile:
|
26 |
for line in jsonfile.readlines():
|
27 |
query = json.loads(line)
|
28 |
+
if len(query['e']) > 0 and len(query['i']) > 0:
|
29 |
+
for p in query['e']:
|
|
|
30 |
positive = p['title']
|
31 |
+
for n in query['i']:
|
32 |
+
negative = p['title']
|
33 |
+
self.queries.append(InputExample(texts=[query['query'], positive, negative]))
|
34 |
|
35 |
def __getitem__(self, item):
|
36 |
return self.queries[item]
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90891565
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4f93afe73f1a6e9b5b7d8ddae92a900d5c02208e215866f340aac1312d0d0e3
|
3 |
size 90891565
|
pytorch_model.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90984263
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93063c8b319d7875dae261c2291d4a955f4025831b3bdced74fc6a5c7e2ee3da
|
3 |
size 90984263
|