fed-lora / examples /NLG /src /format_converting_dart.py
FZH1996
update fed-lora
e7d695a
# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
import sys
import io
import json
with open(sys.argv[1], 'r', encoding='utf8') as reader, \
open(sys.argv[2], 'w', encoding='utf8') as writer :
lines_dict = json.load(reader)
full_rela_lst = []
full_src_lst = []
full_tgt_lst = []
unique_src = 0
for example in lines_dict:
rela_lst = []
temp_triples = ''
for i, tripleset in enumerate(example['tripleset']):
subj, rela, obj = tripleset
rela = rela.lower()
rela_lst.append(rela)
if i > 0:
temp_triples += ' | '
temp_triples += '{} : {} : {}'.format(subj, rela, obj)
unique_src += 1
for sent in example['annotations']:
full_tgt_lst.append(sent['text'])
full_src_lst.append(temp_triples)
full_rela_lst.append(rela_lst)
print('unique source is', unique_src)
for src, tgt in zip(full_src_lst, full_tgt_lst):
x = {}
x['context'] = src # context #+ '||'
x['completion'] = tgt #completion
writer.write(json.dumps(x)+'\n')