SpacyModelCreator / utils /anoter_to_json.py
WebashalarForML's picture
Upload 4 files
2f2758d verified
raw
history blame
3.2 kB
# import json
# def convert_to_spacy_format(json_data):
# spacy_data = []
# # Iterate over the annotations in the input JSON
# for annotation in json_data['annotations']:
# text = annotation[0] # The text is the first element in each annotation
# entities = annotation[1]['entities'] # The entities are in the second element under 'entities'
# spacy_entities = []
# for entity in entities:
# start, end, label = entity
# spacy_entities.append((start, end, label))
# # Append the converted data in the desired format (like B.json)
# spacy_data.append([text, {'entities': spacy_entities}])
# return spacy_data
# # Load your JSON data from 'A.json'
# json_file_path = './JSON/Row_Json_Data.json'
# with open(json_file_path, 'r', encoding='utf-8') as file:
# json_data = json.load(file)
# # Convert the JSON data to the desired format
# spacy_formatted_data = convert_to_spacy_format(json_data)
# # Save the converted data to 'B.json'
# output_file_path = './data/Json_Data.json'
# with open(output_file_path, 'w', encoding='utf-8') as outfile:
# json.dump(spacy_formatted_data, outfile, ensure_ascii=False, indent=4)
# print(f'Successfully converted and saved the data to {output_file_path}')
import json
import os
def convert_to_spacy_format(json_data):
spacy_data = []
# Iterate over the annotations in the input JSON
for annotation in json_data['annotations']:
text = annotation[0] # The text is the first element in each annotation
entities = annotation[1]['entities'] # The entities are in the second element under 'entities'
spacy_entities = []
for entity in entities:
start, end, label = entity
spacy_entities.append((start, end, label))
# Append the converted data in the desired format (like B.json)
spacy_data.append([text, {'entities': spacy_entities}])
return spacy_data
def process_uploaded_json(file_path):
# Load your JSON data from the uploaded file
with open(file_path, 'r', encoding='utf-8') as file:
json_data = json.load(file)
# Convert the JSON data to the desired format
spacy_formatted_data = convert_to_spacy_format(json_data)
# Define the path to the output file
output_file_path = './data/Json_Data.json'
# Check if the file already exists
if os.path.exists(output_file_path):
# If the file exists, load its content
with open(output_file_path, 'r', encoding='utf-8') as outfile:
existing_data = json.load(outfile)
else:
# If the file doesn't exist, start with an empty list
existing_data = []
# Append the new data to the existing data
existing_data.extend(spacy_formatted_data)
# Write the updated data back to the file
with open(output_file_path, 'w', encoding='utf-8') as outfile:
json.dump(existing_data, outfile, ensure_ascii=False, indent=4)
print(f'Successfully appended the new data to {output_file_path}')