{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "ced6196b-109c-489a-ab48-e65829ac6796",
"metadata": {},
"outputs": [],
"source": [
"from transformers import AutoTokenizer, RobertaForTokenClassification\n",
"import torch\n",
"from transformers import AutoTokenizer\n",
"from json import JSONEncoder\n",
"from transformers import AutoTokenizer, AutoModelForTokenClassification"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "89c25949-7688-45f5-be0b-5bfe88bdbc05",
"metadata": {},
"outputs": [],
"source": [
"text=\"\"\"So, if you're a NASA scientist, you should be able to tell me the whole story about the Face On Mars, which obviously is evidence that there is life on Mars, and that the face was created by aliens, correct?\" No, twenty five years ago, our Viking 1 spacecraft was circling the planet, snapping photos, when it spotted the shadowy likeness of a human face. Us scientists figured out that it was just another Martian mesa, common around Cydonia, only this one had shadows that made it look like an Egyption Pharaoh. Very few days later, we revealed the image for all to see, and we made sure to note that it was a huge rock formation that just resembled a human head and face, but all of it was formed by shadows. We only announced it because we thought it would be a good way to engage the public with NASA's findings, and atrract attention to Mars-- and it did.\n",
"\n",
"The face on Mars soon became a pop icon; shot in movies, appeared in books, magazines, radio talk shows, and haunted grocery store checkout lines for 25 years. Some people thought the natural landform was evidence of life on Mars, and that us scientists wanted to hide it, but really, the defenders of the NASA budget wish there was ancient civilization on Mars. We decided to take another shot just to make sure we weren't wrong, on April 5, 1998. Michael Malin and his Mars Orbiter camera team took a picture that was ten times sharper than the original Viking photos, revealing a natural landform, which meant no alien monument. \"But that picture wasn't very clear at all, which could mean alien markings were hidden by haze\" Well no, yes that rumor started, but to prove them wrong on April 8, 2001 we decided to take another picture, making sure it was a cloudless summer day. Malin's team captured an amazing photo using the camera's absolute maximum revolution. With this camera you can discern things in a digital image, 3 times bigger than the pixel size which means if there were any signs of life, you could easily see what they were. What the picture showed was the butte or mesa, which are landforms common around the American West.\"\"\""
]
},
{
"cell_type": "markdown",
"id": "58c6201e-6dd3-4b80-b2de-4d91ca409b50",
"metadata": {},
"source": [
"## Procesar json de salidas de modelos"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "442f07fd-dbaf-4f52-8d5c-a8cbd09efdf2",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"def obtener_dataframe(data):\n",
" \n",
" \n",
" data_flattened = [flatten_json(class_info) for class_info in data]\n",
" df = pd.DataFrame(data_flattened)\n",
" \n",
" return df\n",
" ###\n",
" ### funcion \"flatten_json\" tomada de https://levelup.gitconnected.com/a-deep-dive-into-nested-json-to-data-frame-with-python-69bdabb41938 \n",
" ### Renu Khandelwal Jul 23, 2023\n",
"def flatten_json(y):\n",
" try:\n",
" out = {}\n",
" \n",
" def flatten(x, name=''):\n",
" if type(x) is dict:\n",
" for a in x:\n",
" flatten(x[a], name + a + '_')\n",
" elif type(x) is list:\n",
" i = 0\n",
" for a in x:\n",
" flatten(a, name + str(i) + '_')\n",
" i += 1\n",
" else:\n",
" out[name[:-1]] = x\n",
" \n",
" flatten(y)\n",
" return out\n",
" except json.JSONDecodeError:\n",
" print(\"Error: The JSON document could not be decoded.\")\n",
" except TypeError:\n",
" print(\"Error: Invalid operation or function argument type.\")\n",
" except KeyError:\n",
" print(\"Error: One or more keys do not exist.\")\n",
" except ValueError:\n",
" print(\"Error: Invalid value detected.\")\n",
" except Exception as e:\n",
" # Catch any other exceptions\n",
" print(f\"An unexpected error occurred: {str(e)}\") \n",
"\n"
]
},
{
"cell_type": "markdown",
"id": "6638783a-4b70-4ca4-8b63-fee3c38a090d",
"metadata": {},
"source": [
"## 1 FacebookAI/xlm-roberta-large-finetuned-conll03-english"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "eafceadf-dfa5-4b5b-b03a-c63abf6e81d0",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Some weights of the model checkpoint at FacebookAI/xlm-roberta-large-finetuned-conll03-english were not used when initializing XLMRobertaForTokenClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
"- This IS expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"loss: 0.0\n"
]
}
],
"source": [
"tokenizer = AutoTokenizer.from_pretrained(\"FacebookAI/xlm-roberta-large-finetuned-conll03-english\")\n",
"tokens = tokenizer.tokenize(text)\n",
"ids = tokenizer.convert_tokens_to_ids(tokens)\n",
"input_ids = torch.tensor([ids])\n",
"model_args={\"trust_remote_code\": True}\n",
"model = AutoModelForTokenClassification.from_pretrained(\"FacebookAI/xlm-roberta-large-finetuned-conll03-english\", **model_args)\n",
"with torch.no_grad():\n",
" logits = model(input_ids).logits\n",
"predicted_token_class_ids = logits.argmax(-1)\n",
"\n",
"predicted_tokens_classes = [model.config.id2label[t.item()] for t in predicted_token_class_ids[0]]\n",
"\n",
"labels = predicted_token_class_ids\n",
"loss = model(input_ids, labels=labels).loss\n",
"print('loss:',round(loss.item(), 2))\n",
"\n"
]
},
{
"cell_type": "markdown",
"id": "9efe9516-d6c8-4242-bb2c-aeadd7f41c1d",
"metadata": {},
"source": [
"## Metricas FacebookAI/xlm-roberta-large-finetuned-conll03-english"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "b9c3289c-08e9-4546-81a1-324d29bb1989",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Some weights of the model checkpoint at FacebookAI/xlm-roberta-large-finetuned-conll03-english were not used when initializing XLMRobertaForTokenClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
"- This IS expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'entity': 'I-ORG', 'score': 0.9999913, 'index': 8, 'word': '▁NASA', 'start': 16, 'end': 20}, {'entity': 'I-MISC', 'score': 0.9999641, 'index': 23, 'word': '▁Face', 'start': 88, 'end': 92}, {'entity': 'I-MISC', 'score': 0.99989665, 'index': 24, 'word': '▁On', 'start': 93, 'end': 95}, {'entity': 'I-MISC', 'score': 0.97350365, 'index': 25, 'word': '▁Mars', 'start': 96, 'end': 100}, {'entity': 'I-LOC', 'score': 0.9999362, 'index': 36, 'word': '▁Mars', 'start': 152, 'end': 156}, {'entity': 'I-MISC', 'score': 0.9992086, 'index': 58, 'word': '▁Viking', 'start': 240, 'end': 246}, {'entity': 'I-MISC', 'score': 0.9989502, 'index': 59, 'word': '▁1', 'start': 247, 'end': 248}, {'entity': 'I-MISC', 'score': 0.999977, 'index': 97, 'word': '▁Marti', 'start': 407, 'end': 412}, {'entity': 'I-MISC', 'score': 0.99619055, 'index': 98, 'word': 'an', 'start': 412, 'end': 414}, {'entity': 'I-LOC', 'score': 0.9999354, 'index': 103, 'word': '▁Cy', 'start': 435, 'end': 437}, {'entity': 'I-LOC', 'score': 0.99994576, 'index': 104, 'word': 'do', 'start': 437, 'end': 439}, {'entity': 'I-LOC', 'score': 0.99992585, 'index': 105, 'word': 'nia', 'start': 439, 'end': 442}, {'entity': 'I-MISC', 'score': 0.9999789, 'index': 119, 'word': '▁Egypt', 'start': 496, 'end': 501}, {'entity': 'I-MISC', 'score': 0.9614088, 'index': 120, 'word': 'ion', 'start': 501, 'end': 504}, {'entity': 'I-ORG', 'score': 0.99997246, 'index': 193, 'word': '▁NASA', 'start': 801, 'end': 805}, {'entity': 'I-LOC', 'score': 0.99979633, 'index': 205, 'word': '▁Mars', 'start': 843, 'end': 847}, {'entity': 'I-LOC', 'score': 0.9998061, 'index': 215, 'word': '▁Mars', 'start': 875, 'end': 879}, {'entity': 'I-LOC', 'score': 0.99984956, 'index': 264, 'word': '▁Mars', 'start': 1088, 'end': 1092}, {'entity': 'I-ORG', 'score': 0.99996305, 'index': 285, 'word': '▁NASA', 'start': 1169, 'end': 1173}, {'entity': 'I-LOC', 'score': 0.9998203, 'index': 295, 'word': '▁Mars', 'start': 1220, 'end': 1224}, {'entity': 'I-PER', 'score': 0.9999932, 'index': 319, 'word': '▁Michael', 'start': 1312, 'end': 1319}, {'entity': 'I-PER', 'score': 0.99999106, 'index': 320, 'word': '▁Malin', 'start': 1320, 'end': 1325}, {'entity': 'I-MISC', 'score': 0.94105357, 'index': 323, 'word': '▁Mars', 'start': 1334, 'end': 1338}, {'entity': 'I-MISC', 'score': 0.9839579, 'index': 324, 'word': '▁Or', 'start': 1339, 'end': 1341}, {'entity': 'I-MISC', 'score': 0.9913346, 'index': 325, 'word': 'bit', 'start': 1341, 'end': 1344}, {'entity': 'I-MISC', 'score': 0.9759228, 'index': 326, 'word': 'er', 'start': 1344, 'end': 1346}, {'entity': 'I-MISC', 'score': 0.999749, 'index': 341, 'word': '▁Viking', 'start': 1419, 'end': 1425}, {'entity': 'I-PER', 'score': 0.9999914, 'index': 416, 'word': '▁Malin', 'start': 1745, 'end': 1750}, {'entity': 'I-MISC', 'score': 0.92417294, 'index': 491, 'word': '▁American', 'start': 2094, 'end': 2102}, {'entity': 'I-LOC', 'score': 0.99954396, 'index': 492, 'word': '▁West', 'start': 2103, 'end': 2107}]\n"
]
}
],
"source": [
"import json\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"FacebookAI/xlm-roberta-large-finetuned-conll03-english\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"FacebookAI/xlm-roberta-large-finetuned-conll03-english\")\n",
"classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"salida=classifier(text)\n",
"print(salida)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "ef6050de-0b46-4477-be36-3e6132143cb2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"I-LOC 9\n",
"I-MISC 15\n",
"I-ORG 3\n",
"I-PER 3\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word \n",
"I-LOC do 1\n",
" nia 1\n",
" ▁Cy 1\n",
" ▁Mars 5\n",
" ▁West 1\n",
"I-MISC an 1\n",
" bit 1\n",
" er 1\n",
" ion 1\n",
" ▁1 1\n",
" ▁American 1\n",
" ▁Egypt 1\n",
" ▁Face 1\n",
" ▁Mars 2\n",
" ▁Marti 1\n",
" ▁On 1\n",
" ▁Or 1\n",
" ▁Viking 2\n",
"I-ORG ▁NASA 3\n",
"I-PER ▁Malin 2\n",
" ▁Michael 1\n",
"dtype: int64"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"# Create DataFrame from flattened JSON\n",
"with open(\"metricas_Facebook.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "9b975064-5919-4fe1-b266-8b9632d10e51",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'LOC': {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 7},\n",
" 'MISC': {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 7},\n",
" 'ORG': {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 3},\n",
" 'PER': {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 2},\n",
" 'overall_precision': 1.0,\n",
" 'overall_recall': 1.0,\n",
" 'overall_f1': 1.0,\n",
" 'overall_accuracy': 1.0}"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#pip install seqeval\n",
"from datasets import load_dataset, load_metric\n",
"metric = load_metric(\"seqeval\",**model_args)\n",
"metric.compute(predictions=[predicted_tokens_classes], references=[predicted_tokens_classes])"
]
},
{
"cell_type": "markdown",
"id": "188bdc60-76fe-44a4-9424-b69cc0044b2f",
"metadata": {},
"source": [
"## 2 manu/lilt-infoxlm-base"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "8d891893-0445-47d1-aa06-8674ce5d8cce",
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "The checkpoint you are trying to load has model type `liltrobertalike` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:951\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:653\u001b[0m, in \u001b[0;36m_LazyConfigMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping:\n\u001b[1;32m--> 653\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[0;32m 654\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping[key]\n",
"\u001b[1;31mKeyError\u001b[0m: 'liltrobertalike'",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[52], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoModelForMaskedLM\n\u001b[1;32m----> 2\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForMaskedLM\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmanu/lilt-infoxlm-base\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\auto_factory.py:523\u001b[0m, in \u001b[0;36m_BaseAutoModelClass.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[0;32m 520\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquantization_config\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 521\u001b[0m _ \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquantization_config\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m--> 523\u001b[0m config, kwargs \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 524\u001b[0m pretrained_model_name_or_path,\n\u001b[0;32m 525\u001b[0m return_unused_kwargs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[0;32m 526\u001b[0m trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code,\n\u001b[0;32m 527\u001b[0m code_revision\u001b[38;5;241m=\u001b[39mcode_revision,\n\u001b[0;32m 528\u001b[0m _commit_hash\u001b[38;5;241m=\u001b[39mcommit_hash,\n\u001b[0;32m 529\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mhub_kwargs,\n\u001b[0;32m 530\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[0;32m 531\u001b[0m )\n\u001b[0;32m 533\u001b[0m \u001b[38;5;66;03m# if torch_dtype=auto was passed here, ensure to pass it on\u001b[39;00m\n\u001b[0;32m 534\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m kwargs_orig\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtorch_dtype\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:953\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m--> 953\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 954\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe checkpoint you are trying to load has model type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 955\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut Transformers does not recognize this architecture. This could be because of an \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 956\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124missue with the checkpoint, or because your version of Transformers is out of date.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 957\u001b[0m )\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_dict(config_dict, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39munused_kwargs)\n\u001b[0;32m 959\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 960\u001b[0m \u001b[38;5;66;03m# Fallback: use pattern matching on the string.\u001b[39;00m\n\u001b[0;32m 961\u001b[0m \u001b[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001b[39;00m\n",
"\u001b[1;31mValueError\u001b[0m: The checkpoint you are trying to load has model type `liltrobertalike` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date."
]
}
],
"source": [
"from transformers import AutoModelForMaskedLM\n",
"model = AutoModelForMaskedLM.from_pretrained(\"manu/lilt-infoxlm-base\")"
]
},
{
"cell_type": "markdown",
"id": "2a918756-380f-438f-a544-a4a6767e6eee",
"metadata": {},
"source": [
"## 3 projecte-aina/DEBERTA_CIEL"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "00dc1b6a-7538-4a3e-abff-49b5d72321a5",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'entity_group': 'GPE', 'score': 0.6340015, 'word': 'Cy', 'start': 434, 'end': 437}, {'entity_group': 'location-other', 'score': 0.20539406, 'word': 'donia', 'start': 437, 'end': 442}, {'entity_group': 'organization-other', 'score': 0.3824667, 'word': 'NASA', 'start': 1168, 'end': 1173}, {'entity_group': 'person-other', 'score': 0.62630403, 'word': 'Michael Malin', 'start': 1311, 'end': 1325}, {'entity_group': 'organization-other', 'score': 0.28832194, 'word': 'Mars', 'start': 1333, 'end': 1338}, {'entity_group': 'organization-privatecompany', 'score': 0.13149069, 'word': 'Orbiter', 'start': 1338, 'end': 1346}, {'entity_group': 'person-other', 'score': 0.27721033, 'word': 'Malin', 'start': 1744, 'end': 1750}, {'entity_group': 'location-other', 'score': 0.44634995, 'word': 'American', 'start': 2093, 'end': 2102}, {'entity_group': 'location-other', 'score': 0.18410492, 'word': 'West', 'start': 2102, 'end': 2107}]\n"
]
}
],
"source": [
"\n",
"from transformers import pipeline\n",
"\n",
"pipe = pipeline(\"ner\", model=\"projecte-aina/DEBERTA_CIEL\")\n",
"ner_entity_results = pipe(text, aggregation_strategy=\"simple\")\n",
"print(ner_entity_results)"
]
},
{
"cell_type": "code",
"execution_count": 56,
"id": "183cc325-3f42-4207-9d8c-18cf79db19ad",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity_group\n",
"GPE 1\n",
"location-other 3\n",
"organization-other 2\n",
"organization-privatecompany 1\n",
"person-other 2\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity_group word \n",
"GPE Cy 1\n",
"location-other American 1\n",
" West 1\n",
" donia 1\n",
"organization-other Mars 1\n",
" NASA 1\n",
"organization-privatecompany Orbiter 1\n",
"person-other Malin 1\n",
" Michael Malin 1\n",
"dtype: int64"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create DataFrame from flattened JSON\n",
"with open(\"3 metricas projecte-ainaDEBERTA_CIEL.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity_group']).size())\n",
"aux.groupby(['entity_group', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "6ee85c10-af5e-475b-95f5-9b822eca4c04",
"metadata": {},
"source": [
"## 4 gunghio/distilbert-base-multilingual-cased-finetuned-conll2003-ner"
]
},
{
"cell_type": "code",
"execution_count": 58,
"id": "3d3e7e83-4ed1-41b8-832c-9a544ef596e7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'entity_group': 'ORG', 'score': 0.98227155, 'word': 'NASA', 'start': 16, 'end': 20}, {'entity_group': 'MISC', 'score': 0.7845409, 'word': 'Face On Mars', 'start': 88, 'end': 100}, {'entity_group': 'LOC', 'score': 0.97792244, 'word': 'Mars', 'start': 152, 'end': 156}, {'entity_group': 'MISC', 'score': 0.9694425, 'word': 'Viking 1', 'start': 240, 'end': 248}, {'entity_group': 'MISC', 'score': 0.95397955, 'word': 'Martian', 'start': 407, 'end': 414}, {'entity_group': 'LOC', 'score': 0.92743087, 'word': 'Cydonia', 'start': 435, 'end': 442}, {'entity_group': 'MISC', 'score': 0.6778414, 'word': 'Egyption Ph', 'start': 496, 'end': 507}, {'entity_group': 'MISC', 'score': 0.6644676, 'word': '##oh', 'start': 510, 'end': 512}, {'entity_group': 'ORG', 'score': 0.9827271, 'word': 'NASA', 'start': 801, 'end': 805}, {'entity_group': 'LOC', 'score': 0.9836016, 'word': 'Mars', 'start': 843, 'end': 847}, {'entity_group': 'LOC', 'score': 0.97961295, 'word': 'Mars', 'start': 875, 'end': 879}, {'entity_group': 'LOC', 'score': 0.976, 'word': 'Mars', 'start': 1088, 'end': 1092}, {'entity_group': 'ORG', 'score': 0.9822379, 'word': 'NASA', 'start': 1169, 'end': 1173}, {'entity_group': 'LOC', 'score': 0.9719374, 'word': 'Mars', 'start': 1220, 'end': 1224}, {'entity_group': 'PER', 'score': 0.99495125, 'word': 'Michael Malin', 'start': 1312, 'end': 1325}, {'entity_group': 'ORG', 'score': 0.74125415, 'word': 'Mars Orbiter', 'start': 1334, 'end': 1346}, {'entity_group': 'MISC', 'score': 0.9728442, 'word': 'Viking', 'start': 1419, 'end': 1425}, {'entity_group': 'PER', 'score': 0.8149092, 'word': 'Malin', 'start': 1745, 'end': 1750}, {'entity_group': 'LOC', 'score': 0.9479703, 'word': 'American West', 'start': 2094, 'end': 2107}]\n"
]
}
],
"source": [
"from transformers import pipeline\n",
"\n",
"pipe = pipeline(\"token-classification\", model=\"gunghio/distilbert-base-multilingual-cased-finetuned-conll2003-ner\")\n",
"#pipe = pipeline(\"ner\", model=\"projecte-aina/DEBERTA_CIEL\")\n",
"ner_entity_results = pipe(text, aggregation_strategy=\"simple\")\n",
"print(ner_entity_results)"
]
},
{
"cell_type": "code",
"execution_count": 60,
"id": "ba56217a-23f9-482b-bcb8-9a5295972506",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity_group\n",
"LOC 7\n",
"MISC 6\n",
"ORG 4\n",
"PER 2\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity_group word \n",
"LOC American West 1\n",
" Cydonia 1\n",
" Mars 5\n",
"MISC ##oh 1\n",
" Egyption Ph 1\n",
" Face On Mars 1\n",
" Martian 1\n",
" Viking 1\n",
" Viking 1 1\n",
"ORG Mars Orbiter 1\n",
" NASA 3\n",
"PER Malin 1\n",
" Michael Malin 1\n",
"dtype: int64"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"with open(\"4 metricas gunghio distilbert-base-multilingual-cased-finetuned-conll2003-ner.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity_group']).size())\n",
"aux.groupby(['entity_group', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "2df8815c-41d5-412e-a650-8892c4fb3bdb",
"metadata": {},
"source": [
"## 5 mrm8488/distilbert-base-multi-cased-finetuned-typo-detection"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "a2162ff8-e760-433c-8266-9088bfe7764a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'entity': 'ok',\n",
" 'score': 0.9418772,\n",
" 'index': 2,\n",
" 'word': ',',\n",
" 'start': 2,\n",
" 'end': 3},\n",
" {'entity': 'ok',\n",
" 'score': 0.9061194,\n",
" 'index': 3,\n",
" 'word': 'if',\n",
" 'start': 4,\n",
" 'end': 6},\n",
" {'entity': 'ok',\n",
" 'score': 0.9935272,\n",
" 'index': 4,\n",
" 'word': 'you',\n",
" 'start': 7,\n",
" 'end': 10},\n",
" {'entity': 'ok',\n",
" 'score': 0.9791702,\n",
" 'index': 5,\n",
" 'word': \"'\",\n",
" 'start': 10,\n",
" 'end': 11},\n",
" {'entity': 'ok',\n",
" 'score': 0.99081236,\n",
" 'index': 6,\n",
" 'word': 're',\n",
" 'start': 11,\n",
" 'end': 13},\n",
" {'entity': 'ok',\n",
" 'score': 0.979658,\n",
" 'index': 7,\n",
" 'word': 'a',\n",
" 'start': 14,\n",
" 'end': 15},\n",
" {'entity': 'ok',\n",
" 'score': 0.7008045,\n",
" 'index': 8,\n",
" 'word': 'NASA',\n",
" 'start': 16,\n",
" 'end': 20},\n",
" {'entity': 'ok',\n",
" 'score': 0.9583886,\n",
" 'index': 9,\n",
" 'word': 'scientist',\n",
" 'start': 21,\n",
" 'end': 30},\n",
" {'entity': 'ok',\n",
" 'score': 0.9387937,\n",
" 'index': 10,\n",
" 'word': ',',\n",
" 'start': 30,\n",
" 'end': 31},\n",
" {'entity': 'typo',\n",
" 'score': 0.955443,\n",
" 'index': 11,\n",
" 'word': 'you',\n",
" 'start': 32,\n",
" 'end': 35},\n",
" {'entity': 'typo',\n",
" 'score': 0.5262742,\n",
" 'index': 12,\n",
" 'word': 'should',\n",
" 'start': 36,\n",
" 'end': 42},\n",
" {'entity': 'ok',\n",
" 'score': 0.943428,\n",
" 'index': 13,\n",
" 'word': 'be',\n",
" 'start': 43,\n",
" 'end': 45},\n",
" {'entity': 'ok',\n",
" 'score': 0.9880336,\n",
" 'index': 14,\n",
" 'word': 'able',\n",
" 'start': 46,\n",
" 'end': 50},\n",
" {'entity': 'ok',\n",
" 'score': 0.99831665,\n",
" 'index': 15,\n",
" 'word': 'to',\n",
" 'start': 51,\n",
" 'end': 53},\n",
" {'entity': 'ok',\n",
" 'score': 0.57461846,\n",
" 'index': 16,\n",
" 'word': 'tell',\n",
" 'start': 54,\n",
" 'end': 58},\n",
" {'entity': 'ok',\n",
" 'score': 0.9945175,\n",
" 'index': 17,\n",
" 'word': 'me',\n",
" 'start': 59,\n",
" 'end': 61},\n",
" {'entity': 'ok',\n",
" 'score': 0.810129,\n",
" 'index': 18,\n",
" 'word': 'the',\n",
" 'start': 62,\n",
" 'end': 65},\n",
" {'entity': 'ok',\n",
" 'score': 0.5944688,\n",
" 'index': 19,\n",
" 'word': 'whole',\n",
" 'start': 66,\n",
" 'end': 71},\n",
" {'entity': 'ok',\n",
" 'score': 0.9751041,\n",
" 'index': 20,\n",
" 'word': 'story',\n",
" 'start': 72,\n",
" 'end': 77},\n",
" {'entity': 'ok',\n",
" 'score': 0.8432575,\n",
" 'index': 21,\n",
" 'word': 'about',\n",
" 'start': 78,\n",
" 'end': 83},\n",
" {'entity': 'typo',\n",
" 'score': 0.93727547,\n",
" 'index': 22,\n",
" 'word': 'the',\n",
" 'start': 84,\n",
" 'end': 87},\n",
" {'entity': 'typo',\n",
" 'score': 0.9830071,\n",
" 'index': 23,\n",
" 'word': 'Face',\n",
" 'start': 88,\n",
" 'end': 92},\n",
" {'entity': 'ok',\n",
" 'score': 0.9957579,\n",
" 'index': 24,\n",
" 'word': 'On',\n",
" 'start': 93,\n",
" 'end': 95},\n",
" {'entity': 'ok',\n",
" 'score': 0.98799103,\n",
" 'index': 25,\n",
" 'word': 'Mars',\n",
" 'start': 96,\n",
" 'end': 100},\n",
" {'entity': 'typo',\n",
" 'score': 0.951169,\n",
" 'index': 26,\n",
" 'word': ',',\n",
" 'start': 100,\n",
" 'end': 101},\n",
" {'entity': 'typo',\n",
" 'score': 0.9631144,\n",
" 'index': 27,\n",
" 'word': 'which',\n",
" 'start': 102,\n",
" 'end': 107},\n",
" {'entity': 'typo',\n",
" 'score': 0.99146336,\n",
" 'index': 28,\n",
" 'word': 'obvious',\n",
" 'start': 108,\n",
" 'end': 115},\n",
" {'entity': 'ok',\n",
" 'score': 0.9865329,\n",
" 'index': 29,\n",
" 'word': '##ly',\n",
" 'start': 115,\n",
" 'end': 117},\n",
" {'entity': 'ok',\n",
" 'score': 0.99190086,\n",
" 'index': 30,\n",
" 'word': 'is',\n",
" 'start': 118,\n",
" 'end': 120},\n",
" {'entity': 'ok',\n",
" 'score': 0.87074775,\n",
" 'index': 31,\n",
" 'word': 'evidence',\n",
" 'start': 121,\n",
" 'end': 129},\n",
" {'entity': 'ok',\n",
" 'score': 0.99500775,\n",
" 'index': 32,\n",
" 'word': 'that',\n",
" 'start': 130,\n",
" 'end': 134},\n",
" {'entity': 'typo',\n",
" 'score': 0.94334555,\n",
" 'index': 33,\n",
" 'word': 'there',\n",
" 'start': 135,\n",
" 'end': 140},\n",
" {'entity': 'ok',\n",
" 'score': 0.8655428,\n",
" 'index': 34,\n",
" 'word': 'is',\n",
" 'start': 141,\n",
" 'end': 143},\n",
" {'entity': 'typo',\n",
" 'score': 0.5147766,\n",
" 'index': 35,\n",
" 'word': 'life',\n",
" 'start': 144,\n",
" 'end': 148},\n",
" {'entity': 'ok',\n",
" 'score': 0.9896828,\n",
" 'index': 36,\n",
" 'word': 'on',\n",
" 'start': 149,\n",
" 'end': 151},\n",
" {'entity': 'ok',\n",
" 'score': 0.94172686,\n",
" 'index': 37,\n",
" 'word': 'Mars',\n",
" 'start': 152,\n",
" 'end': 156},\n",
" {'entity': 'typo',\n",
" 'score': 0.92494434,\n",
" 'index': 38,\n",
" 'word': ',',\n",
" 'start': 156,\n",
" 'end': 157},\n",
" {'entity': 'ok',\n",
" 'score': 0.9805345,\n",
" 'index': 39,\n",
" 'word': 'and',\n",
" 'start': 158,\n",
" 'end': 161},\n",
" {'entity': 'ok',\n",
" 'score': 0.8247318,\n",
" 'index': 40,\n",
" 'word': 'that',\n",
" 'start': 162,\n",
" 'end': 166},\n",
" {'entity': 'typo',\n",
" 'score': 0.8335082,\n",
" 'index': 41,\n",
" 'word': 'the',\n",
" 'start': 167,\n",
" 'end': 170},\n",
" {'entity': 'typo',\n",
" 'score': 0.984109,\n",
" 'index': 42,\n",
" 'word': 'face',\n",
" 'start': 171,\n",
" 'end': 175},\n",
" {'entity': 'ok',\n",
" 'score': 0.9346752,\n",
" 'index': 43,\n",
" 'word': 'was',\n",
" 'start': 176,\n",
" 'end': 179},\n",
" {'entity': 'typo',\n",
" 'score': 0.61687183,\n",
" 'index': 44,\n",
" 'word': 'created',\n",
" 'start': 180,\n",
" 'end': 187},\n",
" {'entity': 'ok',\n",
" 'score': 0.9703255,\n",
" 'index': 45,\n",
" 'word': 'by',\n",
" 'start': 188,\n",
" 'end': 190},\n",
" {'entity': 'typo',\n",
" 'score': 0.6155792,\n",
" 'index': 46,\n",
" 'word': 'alien',\n",
" 'start': 191,\n",
" 'end': 196},\n",
" {'entity': 'ok',\n",
" 'score': 0.93030524,\n",
" 'index': 47,\n",
" 'word': '##s',\n",
" 'start': 196,\n",
" 'end': 197},\n",
" {'entity': 'typo',\n",
" 'score': 0.8034546,\n",
" 'index': 48,\n",
" 'word': ',',\n",
" 'start': 197,\n",
" 'end': 198},\n",
" {'entity': 'ok',\n",
" 'score': 0.8684226,\n",
" 'index': 49,\n",
" 'word': 'correct',\n",
" 'start': 199,\n",
" 'end': 206},\n",
" {'entity': 'ok',\n",
" 'score': 0.9899316,\n",
" 'index': 50,\n",
" 'word': '?',\n",
" 'start': 206,\n",
" 'end': 207},\n",
" {'entity': 'typo',\n",
" 'score': 0.68965435,\n",
" 'index': 51,\n",
" 'word': '\"',\n",
" 'start': 207,\n",
" 'end': 208},\n",
" {'entity': 'typo',\n",
" 'score': 0.5541892,\n",
" 'index': 52,\n",
" 'word': 'No',\n",
" 'start': 209,\n",
" 'end': 211},\n",
" {'entity': 'ok',\n",
" 'score': 0.99887687,\n",
" 'index': 53,\n",
" 'word': ',',\n",
" 'start': 211,\n",
" 'end': 212},\n",
" {'entity': 'ok',\n",
" 'score': 0.9993892,\n",
" 'index': 54,\n",
" 'word': 'twenty',\n",
" 'start': 213,\n",
" 'end': 219},\n",
" {'entity': 'ok',\n",
" 'score': 0.9983181,\n",
" 'index': 55,\n",
" 'word': 'five',\n",
" 'start': 220,\n",
" 'end': 224},\n",
" {'entity': 'ok',\n",
" 'score': 0.9524137,\n",
" 'index': 56,\n",
" 'word': 'years',\n",
" 'start': 225,\n",
" 'end': 230},\n",
" {'entity': 'ok',\n",
" 'score': 0.98393893,\n",
" 'index': 57,\n",
" 'word': 'ago',\n",
" 'start': 231,\n",
" 'end': 234},\n",
" {'entity': 'ok',\n",
" 'score': 0.877359,\n",
" 'index': 58,\n",
" 'word': ',',\n",
" 'start': 234,\n",
" 'end': 235},\n",
" {'entity': 'typo',\n",
" 'score': 0.8272593,\n",
" 'index': 59,\n",
" 'word': 'our',\n",
" 'start': 236,\n",
" 'end': 239},\n",
" {'entity': 'ok',\n",
" 'score': 0.88372874,\n",
" 'index': 60,\n",
" 'word': 'Viking',\n",
" 'start': 240,\n",
" 'end': 246},\n",
" {'entity': 'ok',\n",
" 'score': 0.99642414,\n",
" 'index': 61,\n",
" 'word': '1',\n",
" 'start': 247,\n",
" 'end': 248},\n",
" {'entity': 'ok',\n",
" 'score': 0.9792823,\n",
" 'index': 62,\n",
" 'word': 'spacecraft',\n",
" 'start': 249,\n",
" 'end': 259},\n",
" {'entity': 'ok',\n",
" 'score': 0.7188466,\n",
" 'index': 63,\n",
" 'word': 'was',\n",
" 'start': 260,\n",
" 'end': 263},\n",
" {'entity': 'ok',\n",
" 'score': 0.6053355,\n",
" 'index': 64,\n",
" 'word': 'ci',\n",
" 'start': 264,\n",
" 'end': 266},\n",
" {'entity': 'ok',\n",
" 'score': 0.98061955,\n",
" 'index': 65,\n",
" 'word': '##rc',\n",
" 'start': 266,\n",
" 'end': 268},\n",
" {'entity': 'ok',\n",
" 'score': 0.9918943,\n",
" 'index': 66,\n",
" 'word': '##ling',\n",
" 'start': 268,\n",
" 'end': 272},\n",
" {'entity': 'ok',\n",
" 'score': 0.9991246,\n",
" 'index': 67,\n",
" 'word': 'the',\n",
" 'start': 273,\n",
" 'end': 276},\n",
" {'entity': 'ok',\n",
" 'score': 0.99520606,\n",
" 'index': 68,\n",
" 'word': 'planet',\n",
" 'start': 277,\n",
" 'end': 283},\n",
" {'entity': 'typo',\n",
" 'score': 0.9603083,\n",
" 'index': 69,\n",
" 'word': ',',\n",
" 'start': 283,\n",
" 'end': 284},\n",
" {'entity': 'typo',\n",
" 'score': 0.9765087,\n",
" 'index': 70,\n",
" 'word': 'sna',\n",
" 'start': 285,\n",
" 'end': 288},\n",
" {'entity': 'ok',\n",
" 'score': 0.9886219,\n",
" 'index': 71,\n",
" 'word': '##pping',\n",
" 'start': 288,\n",
" 'end': 293},\n",
" {'entity': 'ok',\n",
" 'score': 0.99900526,\n",
" 'index': 72,\n",
" 'word': 'photos',\n",
" 'start': 294,\n",
" 'end': 300},\n",
" {'entity': 'typo',\n",
" 'score': 0.8964089,\n",
" 'index': 73,\n",
" 'word': ',',\n",
" 'start': 300,\n",
" 'end': 301},\n",
" {'entity': 'typo',\n",
" 'score': 0.8211978,\n",
" 'index': 74,\n",
" 'word': 'when',\n",
" 'start': 302,\n",
" 'end': 306},\n",
" {'entity': 'typo',\n",
" 'score': 0.9168602,\n",
" 'index': 75,\n",
" 'word': 'it',\n",
" 'start': 307,\n",
" 'end': 309},\n",
" {'entity': 'ok',\n",
" 'score': 0.9347477,\n",
" 'index': 76,\n",
" 'word': 'spotted',\n",
" 'start': 310,\n",
" 'end': 317},\n",
" {'entity': 'ok',\n",
" 'score': 0.9530431,\n",
" 'index': 77,\n",
" 'word': 'the',\n",
" 'start': 318,\n",
" 'end': 321},\n",
" {'entity': 'typo',\n",
" 'score': 0.52226573,\n",
" 'index': 78,\n",
" 'word': 'sh',\n",
" 'start': 322,\n",
" 'end': 324},\n",
" {'entity': 'ok',\n",
" 'score': 0.98722374,\n",
" 'index': 79,\n",
" 'word': '##adow',\n",
" 'start': 324,\n",
" 'end': 328},\n",
" {'entity': 'ok',\n",
" 'score': 0.9884067,\n",
" 'index': 80,\n",
" 'word': '##y',\n",
" 'start': 328,\n",
" 'end': 329},\n",
" {'entity': 'ok',\n",
" 'score': 0.99751353,\n",
" 'index': 81,\n",
" 'word': 'like',\n",
" 'start': 330,\n",
" 'end': 334},\n",
" {'entity': 'ok',\n",
" 'score': 0.98541266,\n",
" 'index': 82,\n",
" 'word': '##ness',\n",
" 'start': 334,\n",
" 'end': 338},\n",
" {'entity': 'ok',\n",
" 'score': 0.99096996,\n",
" 'index': 83,\n",
" 'word': 'of',\n",
" 'start': 339,\n",
" 'end': 341},\n",
" {'entity': 'ok',\n",
" 'score': 0.9794129,\n",
" 'index': 84,\n",
" 'word': 'a',\n",
" 'start': 342,\n",
" 'end': 343},\n",
" {'entity': 'ok',\n",
" 'score': 0.9906974,\n",
" 'index': 85,\n",
" 'word': 'human',\n",
" 'start': 344,\n",
" 'end': 349},\n",
" {'entity': 'ok',\n",
" 'score': 0.99601525,\n",
" 'index': 86,\n",
" 'word': 'face',\n",
" 'start': 350,\n",
" 'end': 354},\n",
" {'entity': 'typo',\n",
" 'score': 0.80661726,\n",
" 'index': 87,\n",
" 'word': '.',\n",
" 'start': 354,\n",
" 'end': 355},\n",
" {'entity': 'typo',\n",
" 'score': 0.8332319,\n",
" 'index': 88,\n",
" 'word': 'Us',\n",
" 'start': 356,\n",
" 'end': 358},\n",
" {'entity': 'ok',\n",
" 'score': 0.9995962,\n",
" 'index': 89,\n",
" 'word': 'scientists',\n",
" 'start': 359,\n",
" 'end': 369},\n",
" {'entity': 'typo',\n",
" 'score': 0.83959967,\n",
" 'index': 90,\n",
" 'word': 'figure',\n",
" 'start': 370,\n",
" 'end': 376},\n",
" {'entity': 'ok',\n",
" 'score': 0.99624974,\n",
" 'index': 91,\n",
" 'word': '##d',\n",
" 'start': 376,\n",
" 'end': 377},\n",
" {'entity': 'ok',\n",
" 'score': 0.9980217,\n",
" 'index': 92,\n",
" 'word': 'out',\n",
" 'start': 378,\n",
" 'end': 381},\n",
" {'entity': 'ok',\n",
" 'score': 0.5464159,\n",
" 'index': 93,\n",
" 'word': 'that',\n",
" 'start': 382,\n",
" 'end': 386},\n",
" {'entity': 'typo',\n",
" 'score': 0.95522094,\n",
" 'index': 94,\n",
" 'word': 'it',\n",
" 'start': 387,\n",
" 'end': 389},\n",
" {'entity': 'ok',\n",
" 'score': 0.53848577,\n",
" 'index': 95,\n",
" 'word': 'was',\n",
" 'start': 390,\n",
" 'end': 393},\n",
" {'entity': 'typo',\n",
" 'score': 0.9315185,\n",
" 'index': 96,\n",
" 'word': 'just',\n",
" 'start': 394,\n",
" 'end': 398},\n",
" {'entity': 'ok',\n",
" 'score': 0.92040026,\n",
" 'index': 97,\n",
" 'word': 'another',\n",
" 'start': 399,\n",
" 'end': 406},\n",
" {'entity': 'typo',\n",
" 'score': 0.5492458,\n",
" 'index': 98,\n",
" 'word': 'Mart',\n",
" 'start': 407,\n",
" 'end': 411},\n",
" {'entity': 'ok',\n",
" 'score': 0.91541255,\n",
" 'index': 99,\n",
" 'word': '##ian',\n",
" 'start': 411,\n",
" 'end': 414},\n",
" {'entity': 'typo',\n",
" 'score': 0.9870064,\n",
" 'index': 100,\n",
" 'word': 'mesa',\n",
" 'start': 415,\n",
" 'end': 419},\n",
" {'entity': 'typo',\n",
" 'score': 0.9882908,\n",
" 'index': 101,\n",
" 'word': ',',\n",
" 'start': 419,\n",
" 'end': 420},\n",
" {'entity': 'typo',\n",
" 'score': 0.94008124,\n",
" 'index': 102,\n",
" 'word': 'common',\n",
" 'start': 421,\n",
" 'end': 427},\n",
" {'entity': 'ok',\n",
" 'score': 0.99831474,\n",
" 'index': 103,\n",
" 'word': 'around',\n",
" 'start': 428,\n",
" 'end': 434},\n",
" {'entity': 'ok',\n",
" 'score': 0.9985098,\n",
" 'index': 104,\n",
" 'word': 'C',\n",
" 'start': 435,\n",
" 'end': 436},\n",
" {'entity': 'ok',\n",
" 'score': 0.95086056,\n",
" 'index': 105,\n",
" 'word': '##yd',\n",
" 'start': 436,\n",
" 'end': 438},\n",
" {'entity': 'ok',\n",
" 'score': 0.99512345,\n",
" 'index': 106,\n",
" 'word': '##onia',\n",
" 'start': 438,\n",
" 'end': 442},\n",
" {'entity': 'ok',\n",
" 'score': 0.74133,\n",
" 'index': 107,\n",
" 'word': ',',\n",
" 'start': 442,\n",
" 'end': 443},\n",
" {'entity': 'ok',\n",
" 'score': 0.976595,\n",
" 'index': 108,\n",
" 'word': 'only',\n",
" 'start': 444,\n",
" 'end': 448},\n",
" {'entity': 'ok',\n",
" 'score': 0.999059,\n",
" 'index': 109,\n",
" 'word': 'this',\n",
" 'start': 449,\n",
" 'end': 453},\n",
" {'entity': 'ok',\n",
" 'score': 0.9988238,\n",
" 'index': 110,\n",
" 'word': 'one',\n",
" 'start': 454,\n",
" 'end': 457},\n",
" {'entity': 'ok',\n",
" 'score': 0.8208375,\n",
" 'index': 111,\n",
" 'word': 'had',\n",
" 'start': 458,\n",
" 'end': 461},\n",
" {'entity': 'typo',\n",
" 'score': 0.70970184,\n",
" 'index': 112,\n",
" 'word': 'sh',\n",
" 'start': 462,\n",
" 'end': 464},\n",
" {'entity': 'ok',\n",
" 'score': 0.96421015,\n",
" 'index': 113,\n",
" 'word': '##adow',\n",
" 'start': 464,\n",
" 'end': 468},\n",
" {'entity': 'ok',\n",
" 'score': 0.9639605,\n",
" 'index': 114,\n",
" 'word': '##s',\n",
" 'start': 468,\n",
" 'end': 469},\n",
" {'entity': 'ok',\n",
" 'score': 0.90302604,\n",
" 'index': 115,\n",
" 'word': 'that',\n",
" 'start': 470,\n",
" 'end': 474},\n",
" {'entity': 'ok',\n",
" 'score': 0.6680046,\n",
" 'index': 116,\n",
" 'word': 'made',\n",
" 'start': 475,\n",
" 'end': 479},\n",
" {'entity': 'ok',\n",
" 'score': 0.9925667,\n",
" 'index': 117,\n",
" 'word': 'it',\n",
" 'start': 480,\n",
" 'end': 482},\n",
" {'entity': 'typo',\n",
" 'score': 0.5095613,\n",
" 'index': 118,\n",
" 'word': 'look',\n",
" 'start': 483,\n",
" 'end': 487},\n",
" {'entity': 'ok',\n",
" 'score': 0.9739361,\n",
" 'index': 119,\n",
" 'word': 'like',\n",
" 'start': 488,\n",
" 'end': 492},\n",
" {'entity': 'ok',\n",
" 'score': 0.9018896,\n",
" 'index': 120,\n",
" 'word': 'an',\n",
" 'start': 493,\n",
" 'end': 495},\n",
" {'entity': 'typo',\n",
" 'score': 0.5793097,\n",
" 'index': 121,\n",
" 'word': 'Egypt',\n",
" 'start': 496,\n",
" 'end': 501},\n",
" {'entity': 'ok',\n",
" 'score': 0.9924223,\n",
" 'index': 122,\n",
" 'word': '##ion',\n",
" 'start': 501,\n",
" 'end': 504},\n",
" {'entity': 'ok',\n",
" 'score': 0.9970651,\n",
" 'index': 123,\n",
" 'word': 'Ph',\n",
" 'start': 505,\n",
" 'end': 507},\n",
" {'entity': 'ok',\n",
" 'score': 0.99351513,\n",
" 'index': 124,\n",
" 'word': '##ara',\n",
" 'start': 507,\n",
" 'end': 510},\n",
" {'entity': 'ok',\n",
" 'score': 0.98944503,\n",
" 'index': 125,\n",
" 'word': '##oh',\n",
" 'start': 510,\n",
" 'end': 512},\n",
" {'entity': 'typo',\n",
" 'score': 0.44638777,\n",
" 'index': 126,\n",
" 'word': '.',\n",
" 'start': 512,\n",
" 'end': 513},\n",
" {'entity': 'typo',\n",
" 'score': 0.9027144,\n",
" 'index': 127,\n",
" 'word': 'Very',\n",
" 'start': 514,\n",
" 'end': 518},\n",
" {'entity': 'ok',\n",
" 'score': 0.95206773,\n",
" 'index': 128,\n",
" 'word': 'few',\n",
" 'start': 519,\n",
" 'end': 522},\n",
" {'entity': 'ok',\n",
" 'score': 0.88961726,\n",
" 'index': 129,\n",
" 'word': 'days',\n",
" 'start': 523,\n",
" 'end': 527},\n",
" {'entity': 'ok',\n",
" 'score': 0.9765072,\n",
" 'index': 130,\n",
" 'word': 'later',\n",
" 'start': 528,\n",
" 'end': 533},\n",
" {'entity': 'typo',\n",
" 'score': 0.884694,\n",
" 'index': 131,\n",
" 'word': ',',\n",
" 'start': 533,\n",
" 'end': 534},\n",
" {'entity': 'typo',\n",
" 'score': 0.9981382,\n",
" 'index': 132,\n",
" 'word': 'we',\n",
" 'start': 535,\n",
" 'end': 537},\n",
" {'entity': 'typo',\n",
" 'score': 0.8012792,\n",
" 'index': 133,\n",
" 'word': 'revealed',\n",
" 'start': 538,\n",
" 'end': 546},\n",
" {'entity': 'ok',\n",
" 'score': 0.91450936,\n",
" 'index': 134,\n",
" 'word': 'the',\n",
" 'start': 547,\n",
" 'end': 550},\n",
" {'entity': 'typo',\n",
" 'score': 0.9744159,\n",
" 'index': 135,\n",
" 'word': 'image',\n",
" 'start': 551,\n",
" 'end': 556},\n",
" {'entity': 'typo',\n",
" 'score': 0.9916164,\n",
" 'index': 136,\n",
" 'word': 'for',\n",
" 'start': 557,\n",
" 'end': 560},\n",
" {'entity': 'typo',\n",
" 'score': 0.5908708,\n",
" 'index': 137,\n",
" 'word': 'all',\n",
" 'start': 561,\n",
" 'end': 564},\n",
" {'entity': 'ok',\n",
" 'score': 0.99475324,\n",
" 'index': 138,\n",
" 'word': 'to',\n",
" 'start': 565,\n",
" 'end': 567},\n",
" {'entity': 'ok',\n",
" 'score': 0.8057313,\n",
" 'index': 139,\n",
" 'word': 'see',\n",
" 'start': 568,\n",
" 'end': 571},\n",
" {'entity': 'typo',\n",
" 'score': 0.61359376,\n",
" 'index': 140,\n",
" 'word': ',',\n",
" 'start': 571,\n",
" 'end': 572},\n",
" {'entity': 'ok',\n",
" 'score': 0.99861836,\n",
" 'index': 141,\n",
" 'word': 'and',\n",
" 'start': 573,\n",
" 'end': 576},\n",
" {'entity': 'ok',\n",
" 'score': 0.6784175,\n",
" 'index': 142,\n",
" 'word': 'we',\n",
" 'start': 577,\n",
" 'end': 579},\n",
" {'entity': 'ok',\n",
" 'score': 0.9465173,\n",
" 'index': 143,\n",
" 'word': 'made',\n",
" 'start': 580,\n",
" 'end': 584},\n",
" {'entity': 'ok',\n",
" 'score': 0.9947103,\n",
" 'index': 144,\n",
" 'word': 'sure',\n",
" 'start': 585,\n",
" 'end': 589},\n",
" {'entity': 'ok',\n",
" 'score': 0.99907136,\n",
" 'index': 145,\n",
" 'word': 'to',\n",
" 'start': 590,\n",
" 'end': 592},\n",
" {'entity': 'ok',\n",
" 'score': 0.98897797,\n",
" 'index': 146,\n",
" 'word': 'note',\n",
" 'start': 593,\n",
" 'end': 597},\n",
" {'entity': 'ok',\n",
" 'score': 0.9916495,\n",
" 'index': 147,\n",
" 'word': 'that',\n",
" 'start': 598,\n",
" 'end': 602},\n",
" {'entity': 'typo',\n",
" 'score': 0.8916498,\n",
" 'index': 148,\n",
" 'word': 'it',\n",
" 'start': 603,\n",
" 'end': 605},\n",
" {'entity': 'ok',\n",
" 'score': 0.59980196,\n",
" 'index': 149,\n",
" 'word': 'was',\n",
" 'start': 606,\n",
" 'end': 609},\n",
" {'entity': 'typo',\n",
" 'score': 0.6458891,\n",
" 'index': 150,\n",
" 'word': 'a',\n",
" 'start': 610,\n",
" 'end': 611},\n",
" {'entity': 'ok',\n",
" 'score': 0.86706686,\n",
" 'index': 151,\n",
" 'word': 'huge',\n",
" 'start': 612,\n",
" 'end': 616},\n",
" {'entity': 'ok',\n",
" 'score': 0.74633974,\n",
" 'index': 152,\n",
" 'word': 'rock',\n",
" 'start': 617,\n",
" 'end': 621},\n",
" {'entity': 'ok',\n",
" 'score': 0.9891533,\n",
" 'index': 153,\n",
" 'word': 'formation',\n",
" 'start': 622,\n",
" 'end': 631},\n",
" {'entity': 'ok',\n",
" 'score': 0.8717077,\n",
" 'index': 154,\n",
" 'word': 'that',\n",
" 'start': 632,\n",
" 'end': 636},\n",
" {'entity': 'typo',\n",
" 'score': 0.99484986,\n",
" 'index': 155,\n",
" 'word': 'just',\n",
" 'start': 637,\n",
" 'end': 641},\n",
" {'entity': 'typo',\n",
" 'score': 0.5499501,\n",
" 'index': 156,\n",
" 'word': 'res',\n",
" 'start': 642,\n",
" 'end': 645},\n",
" {'entity': 'ok',\n",
" 'score': 0.89776593,\n",
" 'index': 157,\n",
" 'word': '##emble',\n",
" 'start': 645,\n",
" 'end': 650},\n",
" {'entity': 'ok',\n",
" 'score': 0.9908867,\n",
" 'index': 158,\n",
" 'word': '##d',\n",
" 'start': 650,\n",
" 'end': 651},\n",
" {'entity': 'ok',\n",
" 'score': 0.747778,\n",
" 'index': 159,\n",
" 'word': 'a',\n",
" 'start': 652,\n",
" 'end': 653},\n",
" {'entity': 'ok',\n",
" 'score': 0.9308926,\n",
" 'index': 160,\n",
" 'word': 'human',\n",
" 'start': 654,\n",
" 'end': 659},\n",
" {'entity': 'ok',\n",
" 'score': 0.8658663,\n",
" 'index': 161,\n",
" 'word': 'head',\n",
" 'start': 660,\n",
" 'end': 664},\n",
" {'entity': 'ok',\n",
" 'score': 0.9994103,\n",
" 'index': 162,\n",
" 'word': 'and',\n",
" 'start': 665,\n",
" 'end': 668},\n",
" {'entity': 'ok',\n",
" 'score': 0.99929607,\n",
" 'index': 163,\n",
" 'word': 'face',\n",
" 'start': 669,\n",
" 'end': 673},\n",
" {'entity': 'ok',\n",
" 'score': 0.86389965,\n",
" 'index': 164,\n",
" 'word': ',',\n",
" 'start': 673,\n",
" 'end': 674},\n",
" {'entity': 'ok',\n",
" 'score': 0.99792165,\n",
" 'index': 165,\n",
" 'word': 'but',\n",
" 'start': 675,\n",
" 'end': 678},\n",
" {'entity': 'ok',\n",
" 'score': 0.9912469,\n",
" 'index': 166,\n",
" 'word': 'all',\n",
" 'start': 679,\n",
" 'end': 682},\n",
" {'entity': 'ok',\n",
" 'score': 0.9983467,\n",
" 'index': 167,\n",
" 'word': 'of',\n",
" 'start': 683,\n",
" 'end': 685},\n",
" {'entity': 'ok',\n",
" 'score': 0.9972486,\n",
" 'index': 168,\n",
" 'word': 'it',\n",
" 'start': 686,\n",
" 'end': 688},\n",
" {'entity': 'ok',\n",
" 'score': 0.99832314,\n",
" 'index': 169,\n",
" 'word': 'was',\n",
" 'start': 689,\n",
" 'end': 692},\n",
" {'entity': 'ok',\n",
" 'score': 0.9965006,\n",
" 'index': 170,\n",
" 'word': 'formed',\n",
" 'start': 693,\n",
" 'end': 699},\n",
" {'entity': 'ok',\n",
" 'score': 0.9985544,\n",
" 'index': 171,\n",
" 'word': 'by',\n",
" 'start': 700,\n",
" 'end': 702},\n",
" {'entity': 'ok',\n",
" 'score': 0.98051447,\n",
" 'index': 172,\n",
" 'word': 'sh',\n",
" 'start': 703,\n",
" 'end': 705},\n",
" {'entity': 'ok',\n",
" 'score': 0.9919695,\n",
" 'index': 173,\n",
" 'word': '##adow',\n",
" 'start': 705,\n",
" 'end': 709},\n",
" {'entity': 'ok',\n",
" 'score': 0.986326,\n",
" 'index': 174,\n",
" 'word': '##s',\n",
" 'start': 709,\n",
" 'end': 710},\n",
" {'entity': 'typo',\n",
" 'score': 0.79459554,\n",
" 'index': 175,\n",
" 'word': '.',\n",
" 'start': 710,\n",
" 'end': 711},\n",
" {'entity': 'typo',\n",
" 'score': 0.99913234,\n",
" 'index': 176,\n",
" 'word': 'We',\n",
" 'start': 712,\n",
" 'end': 714},\n",
" {'entity': 'ok',\n",
" 'score': 0.9165677,\n",
" 'index': 177,\n",
" 'word': 'only',\n",
" 'start': 715,\n",
" 'end': 719},\n",
" {'entity': 'ok',\n",
" 'score': 0.9994357,\n",
" 'index': 178,\n",
" 'word': 'announced',\n",
" 'start': 720,\n",
" 'end': 729},\n",
" {'entity': 'ok',\n",
" 'score': 0.9980812,\n",
" 'index': 179,\n",
" 'word': 'it',\n",
" 'start': 730,\n",
" 'end': 732},\n",
" {'entity': 'ok',\n",
" 'score': 0.996609,\n",
" 'index': 180,\n",
" 'word': 'because',\n",
" 'start': 733,\n",
" 'end': 740},\n",
" {'entity': 'ok',\n",
" 'score': 0.7698178,\n",
" 'index': 181,\n",
" 'word': 'we',\n",
" 'start': 741,\n",
" 'end': 743},\n",
" {'entity': 'ok',\n",
" 'score': 0.9095254,\n",
" 'index': 182,\n",
" 'word': 'thought',\n",
" 'start': 744,\n",
" 'end': 751},\n",
" {'entity': 'ok',\n",
" 'score': 0.9074025,\n",
" 'index': 183,\n",
" 'word': 'it',\n",
" 'start': 752,\n",
" 'end': 754},\n",
" {'entity': 'ok',\n",
" 'score': 0.94705737,\n",
" 'index': 184,\n",
" 'word': 'would',\n",
" 'start': 755,\n",
" 'end': 760},\n",
" {'entity': 'ok',\n",
" 'score': 0.8691749,\n",
" 'index': 185,\n",
" 'word': 'be',\n",
" 'start': 761,\n",
" 'end': 763},\n",
" {'entity': 'ok',\n",
" 'score': 0.7643121,\n",
" 'index': 186,\n",
" 'word': 'a',\n",
" 'start': 764,\n",
" 'end': 765},\n",
" {'entity': 'ok',\n",
" 'score': 0.92725027,\n",
" 'index': 187,\n",
" 'word': 'good',\n",
" 'start': 766,\n",
" 'end': 770},\n",
" {'entity': 'ok',\n",
" 'score': 0.98407435,\n",
" 'index': 188,\n",
" 'word': 'way',\n",
" 'start': 771,\n",
" 'end': 774},\n",
" {'entity': 'ok',\n",
" 'score': 0.9118401,\n",
" 'index': 189,\n",
" 'word': 'to',\n",
" 'start': 775,\n",
" 'end': 777},\n",
" {'entity': 'typo',\n",
" 'score': 0.80820084,\n",
" 'index': 190,\n",
" 'word': 'engage',\n",
" 'start': 778,\n",
" 'end': 784},\n",
" {'entity': 'ok',\n",
" 'score': 0.9221322,\n",
" 'index': 191,\n",
" 'word': 'the',\n",
" 'start': 785,\n",
" 'end': 788},\n",
" {'entity': 'ok',\n",
" 'score': 0.8702925,\n",
" 'index': 192,\n",
" 'word': 'public',\n",
" 'start': 789,\n",
" 'end': 795},\n",
" {'entity': 'ok',\n",
" 'score': 0.96217585,\n",
" 'index': 193,\n",
" 'word': 'with',\n",
" 'start': 796,\n",
" 'end': 800},\n",
" {'entity': 'ok',\n",
" 'score': 0.5905169,\n",
" 'index': 194,\n",
" 'word': 'NASA',\n",
" 'start': 801,\n",
" 'end': 805},\n",
" {'entity': 'ok',\n",
" 'score': 0.52509785,\n",
" 'index': 195,\n",
" 'word': \"'\",\n",
" 'start': 805,\n",
" 'end': 806},\n",
" {'entity': 'typo',\n",
" 'score': 0.62146723,\n",
" 'index': 196,\n",
" 'word': 's',\n",
" 'start': 806,\n",
" 'end': 807},\n",
" {'entity': 'typo',\n",
" 'score': 0.79807454,\n",
" 'index': 197,\n",
" 'word': 'findings',\n",
" 'start': 808,\n",
" 'end': 816},\n",
" {'entity': 'typo',\n",
" 'score': 0.99402726,\n",
" 'index': 198,\n",
" 'word': ',',\n",
" 'start': 816,\n",
" 'end': 817},\n",
" {'entity': 'ok',\n",
" 'score': 0.49941942,\n",
" 'index': 199,\n",
" 'word': 'and',\n",
" 'start': 818,\n",
" 'end': 821},\n",
" {'entity': 'typo',\n",
" 'score': 0.9993143,\n",
" 'index': 200,\n",
" 'word': 'at',\n",
" 'start': 822,\n",
" 'end': 824},\n",
" {'entity': 'ok',\n",
" 'score': 0.9600053,\n",
" 'index': 201,\n",
" 'word': '##rra',\n",
" 'start': 824,\n",
" 'end': 827},\n",
" {'entity': 'ok',\n",
" 'score': 0.98252696,\n",
" 'index': 202,\n",
" 'word': '##ct',\n",
" 'start': 827,\n",
" 'end': 829},\n",
" {'entity': 'ok',\n",
" 'score': 0.9993383,\n",
" 'index': 203,\n",
" 'word': 'attention',\n",
" 'start': 830,\n",
" 'end': 839},\n",
" {'entity': 'ok',\n",
" 'score': 0.8149123,\n",
" 'index': 204,\n",
" 'word': 'to',\n",
" 'start': 840,\n",
" 'end': 842},\n",
" {'entity': 'ok',\n",
" 'score': 0.928141,\n",
" 'index': 205,\n",
" 'word': 'Mars',\n",
" 'start': 843,\n",
" 'end': 847},\n",
" {'entity': 'typo',\n",
" 'score': 0.98144484,\n",
" 'index': 206,\n",
" 'word': '-',\n",
" 'start': 847,\n",
" 'end': 848},\n",
" {'entity': 'typo',\n",
" 'score': 0.9430255,\n",
" 'index': 207,\n",
" 'word': '-',\n",
" 'start': 848,\n",
" 'end': 849},\n",
" {'entity': 'ok',\n",
" 'score': 0.9964923,\n",
" 'index': 208,\n",
" 'word': 'and',\n",
" 'start': 850,\n",
" 'end': 853},\n",
" {'entity': 'ok',\n",
" 'score': 0.9620715,\n",
" 'index': 209,\n",
" 'word': 'it',\n",
" 'start': 854,\n",
" 'end': 856},\n",
" {'entity': 'ok',\n",
" 'score': 0.9969836,\n",
" 'index': 210,\n",
" 'word': 'did',\n",
" 'start': 857,\n",
" 'end': 860},\n",
" {'entity': 'ok',\n",
" 'score': 0.42969742,\n",
" 'index': 211,\n",
" 'word': '.',\n",
" 'start': 860,\n",
" 'end': 861},\n",
" {'entity': 'ok',\n",
" 'score': 0.7296629,\n",
" 'index': 212,\n",
" 'word': 'The',\n",
" 'start': 863,\n",
" 'end': 866},\n",
" {'entity': 'ok',\n",
" 'score': 0.6601624,\n",
" 'index': 213,\n",
" 'word': 'face',\n",
" 'start': 867,\n",
" 'end': 871},\n",
" {'entity': 'ok',\n",
" 'score': 0.9987452,\n",
" 'index': 214,\n",
" 'word': 'on',\n",
" 'start': 872,\n",
" 'end': 874},\n",
" {'entity': 'ok',\n",
" 'score': 0.995214,\n",
" 'index': 215,\n",
" 'word': 'Mars',\n",
" 'start': 875,\n",
" 'end': 879},\n",
" {'entity': 'typo',\n",
" 'score': 0.76099324,\n",
" 'index': 216,\n",
" 'word': 'soon',\n",
" 'start': 880,\n",
" 'end': 884},\n",
" {'entity': 'ok',\n",
" 'score': 0.93993735,\n",
" 'index': 217,\n",
" 'word': 'became',\n",
" 'start': 885,\n",
" 'end': 891},\n",
" {'entity': 'ok',\n",
" 'score': 0.76330084,\n",
" 'index': 218,\n",
" 'word': 'a',\n",
" 'start': 892,\n",
" 'end': 893},\n",
" {'entity': 'typo',\n",
" 'score': 0.7646893,\n",
" 'index': 219,\n",
" 'word': 'pop',\n",
" 'start': 894,\n",
" 'end': 897},\n",
" {'entity': 'ok',\n",
" 'score': 0.99559265,\n",
" 'index': 220,\n",
" 'word': 'i',\n",
" 'start': 898,\n",
" 'end': 899},\n",
" {'entity': 'ok',\n",
" 'score': 0.99413526,\n",
" 'index': 221,\n",
" 'word': '##con',\n",
" 'start': 899,\n",
" 'end': 902},\n",
" {'entity': 'ok',\n",
" 'score': 0.7378378,\n",
" 'index': 222,\n",
" 'word': ';',\n",
" 'start': 902,\n",
" 'end': 903},\n",
" {'entity': 'typo',\n",
" 'score': 0.90965664,\n",
" 'index': 223,\n",
" 'word': 'shot',\n",
" 'start': 904,\n",
" 'end': 908},\n",
" {'entity': 'ok',\n",
" 'score': 0.998401,\n",
" 'index': 224,\n",
" 'word': 'in',\n",
" 'start': 909,\n",
" 'end': 911},\n",
" {'entity': 'ok',\n",
" 'score': 0.99020493,\n",
" 'index': 225,\n",
" 'word': 'movies',\n",
" 'start': 912,\n",
" 'end': 918},\n",
" {'entity': 'ok',\n",
" 'score': 0.8732322,\n",
" 'index': 226,\n",
" 'word': ',',\n",
" 'start': 918,\n",
" 'end': 919},\n",
" {'entity': 'typo',\n",
" 'score': 0.9965013,\n",
" 'index': 227,\n",
" 'word': 'appeared',\n",
" 'start': 920,\n",
" 'end': 928},\n",
" {'entity': 'ok',\n",
" 'score': 0.99827015,\n",
" 'index': 228,\n",
" 'word': 'in',\n",
" 'start': 929,\n",
" 'end': 931},\n",
" {'entity': 'ok',\n",
" 'score': 0.94189245,\n",
" 'index': 229,\n",
" 'word': 'books',\n",
" 'start': 932,\n",
" 'end': 937},\n",
" {'entity': 'ok',\n",
" 'score': 0.99873203,\n",
" 'index': 230,\n",
" 'word': ',',\n",
" 'start': 937,\n",
" 'end': 938},\n",
" {'entity': 'ok',\n",
" 'score': 0.788286,\n",
" 'index': 231,\n",
" 'word': 'magazines',\n",
" 'start': 939,\n",
" 'end': 948},\n",
" {'entity': 'ok',\n",
" 'score': 0.9985929,\n",
" 'index': 232,\n",
" 'word': ',',\n",
" 'start': 948,\n",
" 'end': 949},\n",
" {'entity': 'typo',\n",
" 'score': 0.83558345,\n",
" 'index': 233,\n",
" 'word': 'radio',\n",
" 'start': 950,\n",
" 'end': 955},\n",
" {'entity': 'ok',\n",
" 'score': 0.9941037,\n",
" 'index': 234,\n",
" 'word': 'talk',\n",
" 'start': 956,\n",
" 'end': 960},\n",
" {'entity': 'ok',\n",
" 'score': 0.9955338,\n",
" 'index': 235,\n",
" 'word': 'shows',\n",
" 'start': 961,\n",
" 'end': 966},\n",
" {'entity': 'typo',\n",
" 'score': 0.8871708,\n",
" 'index': 236,\n",
" 'word': ',',\n",
" 'start': 966,\n",
" 'end': 967},\n",
" {'entity': 'typo',\n",
" 'score': 0.5271952,\n",
" 'index': 237,\n",
" 'word': 'and',\n",
" 'start': 968,\n",
" 'end': 971},\n",
" {'entity': 'typo',\n",
" 'score': 0.99819934,\n",
" 'index': 238,\n",
" 'word': 'hau',\n",
" 'start': 972,\n",
" 'end': 975},\n",
" {'entity': 'ok',\n",
" 'score': 0.8323784,\n",
" 'index': 239,\n",
" 'word': '##nted',\n",
" 'start': 975,\n",
" 'end': 979},\n",
" {'entity': 'typo',\n",
" 'score': 0.97368705,\n",
" 'index': 240,\n",
" 'word': 'gr',\n",
" 'start': 980,\n",
" 'end': 982},\n",
" {'entity': 'ok',\n",
" 'score': 0.54475385,\n",
" 'index': 241,\n",
" 'word': '##oce',\n",
" 'start': 982,\n",
" 'end': 985},\n",
" {'entity': 'ok',\n",
" 'score': 0.9955764,\n",
" 'index': 242,\n",
" 'word': '##ry',\n",
" 'start': 985,\n",
" 'end': 987},\n",
" {'entity': 'ok',\n",
" 'score': 0.9983236,\n",
" 'index': 243,\n",
" 'word': 'store',\n",
" 'start': 988,\n",
" 'end': 993},\n",
" {'entity': 'ok',\n",
" 'score': 0.7288064,\n",
" 'index': 244,\n",
" 'word': 'check',\n",
" 'start': 994,\n",
" 'end': 999},\n",
" {'entity': 'ok',\n",
" 'score': 0.9966523,\n",
" 'index': 245,\n",
" 'word': '##out',\n",
" 'start': 999,\n",
" 'end': 1002},\n",
" {'entity': 'ok',\n",
" 'score': 0.99845624,\n",
" 'index': 246,\n",
" 'word': 'lines',\n",
" 'start': 1003,\n",
" 'end': 1008},\n",
" {'entity': 'typo',\n",
" 'score': 0.6110894,\n",
" 'index': 247,\n",
" 'word': 'for',\n",
" 'start': 1009,\n",
" 'end': 1012},\n",
" {'entity': 'ok',\n",
" 'score': 0.7403333,\n",
" 'index': 248,\n",
" 'word': '25',\n",
" 'start': 1013,\n",
" 'end': 1015},\n",
" {'entity': 'ok',\n",
" 'score': 0.90443635,\n",
" 'index': 249,\n",
" 'word': 'years',\n",
" 'start': 1016,\n",
" 'end': 1021},\n",
" {'entity': 'typo',\n",
" 'score': 0.7376988,\n",
" 'index': 250,\n",
" 'word': '.',\n",
" 'start': 1021,\n",
" 'end': 1022},\n",
" {'entity': 'typo',\n",
" 'score': 0.7852515,\n",
" 'index': 251,\n",
" 'word': 'Some',\n",
" 'start': 1023,\n",
" 'end': 1027},\n",
" {'entity': 'ok',\n",
" 'score': 0.76370704,\n",
" 'index': 252,\n",
" 'word': 'people',\n",
" 'start': 1028,\n",
" 'end': 1034},\n",
" {'entity': 'typo',\n",
" 'score': 0.62583363,\n",
" 'index': 253,\n",
" 'word': 'thought',\n",
" 'start': 1035,\n",
" 'end': 1042},\n",
" {'entity': 'ok',\n",
" 'score': 0.5030125,\n",
" 'index': 254,\n",
" 'word': 'the',\n",
" 'start': 1043,\n",
" 'end': 1046},\n",
" {'entity': 'typo',\n",
" 'score': 0.720193,\n",
" 'index': 255,\n",
" 'word': 'natural',\n",
" 'start': 1047,\n",
" 'end': 1054},\n",
" {'entity': 'ok',\n",
" 'score': 0.9988502,\n",
" 'index': 256,\n",
" 'word': 'land',\n",
" 'start': 1055,\n",
" 'end': 1059},\n",
" {'entity': 'ok',\n",
" 'score': 0.97257465,\n",
" 'index': 257,\n",
" 'word': '##form',\n",
" 'start': 1059,\n",
" 'end': 1063},\n",
" {'entity': 'ok',\n",
" 'score': 0.8562199,\n",
" 'index': 258,\n",
" 'word': 'was',\n",
" 'start': 1064,\n",
" 'end': 1067},\n",
" {'entity': 'typo',\n",
" 'score': 0.8256486,\n",
" 'index': 259,\n",
" 'word': 'evidence',\n",
" 'start': 1068,\n",
" 'end': 1076},\n",
" {'entity': 'ok',\n",
" 'score': 0.99360114,\n",
" 'index': 260,\n",
" 'word': 'of',\n",
" 'start': 1077,\n",
" 'end': 1079},\n",
" {'entity': 'ok',\n",
" 'score': 0.8346857,\n",
" 'index': 261,\n",
" 'word': 'life',\n",
" 'start': 1080,\n",
" 'end': 1084},\n",
" {'entity': 'ok',\n",
" 'score': 0.982291,\n",
" 'index': 262,\n",
" 'word': 'on',\n",
" 'start': 1085,\n",
" 'end': 1087},\n",
" {'entity': 'ok',\n",
" 'score': 0.97399634,\n",
" 'index': 263,\n",
" 'word': 'Mars',\n",
" 'start': 1088,\n",
" 'end': 1092},\n",
" {'entity': 'typo',\n",
" 'score': 0.90118784,\n",
" 'index': 264,\n",
" 'word': ',',\n",
" 'start': 1092,\n",
" 'end': 1093},\n",
" {'entity': 'ok',\n",
" 'score': 0.59052175,\n",
" 'index': 265,\n",
" 'word': 'and',\n",
" 'start': 1094,\n",
" 'end': 1097},\n",
" {'entity': 'typo',\n",
" 'score': 0.9531032,\n",
" 'index': 266,\n",
" 'word': 'that',\n",
" 'start': 1098,\n",
" 'end': 1102},\n",
" {'entity': 'typo',\n",
" 'score': 0.5538794,\n",
" 'index': 267,\n",
" 'word': 'us',\n",
" 'start': 1103,\n",
" 'end': 1105},\n",
" {'entity': 'ok',\n",
" 'score': 0.996549,\n",
" 'index': 268,\n",
" 'word': 'scientists',\n",
" 'start': 1106,\n",
" 'end': 1116},\n",
" {'entity': 'typo',\n",
" 'score': 0.7577664,\n",
" 'index': 269,\n",
" 'word': 'wanted',\n",
" 'start': 1117,\n",
" 'end': 1123},\n",
" {'entity': 'ok',\n",
" 'score': 0.94258755,\n",
" 'index': 270,\n",
" 'word': 'to',\n",
" 'start': 1124,\n",
" 'end': 1126},\n",
" {'entity': 'typo',\n",
" 'score': 0.901848,\n",
" 'index': 271,\n",
" 'word': 'hide',\n",
" 'start': 1127,\n",
" 'end': 1131},\n",
" {'entity': 'ok',\n",
" 'score': 0.87200266,\n",
" 'index': 272,\n",
" 'word': 'it',\n",
" 'start': 1132,\n",
" 'end': 1134},\n",
" {'entity': 'typo',\n",
" 'score': 0.93054014,\n",
" 'index': 273,\n",
" 'word': ',',\n",
" 'start': 1134,\n",
" 'end': 1135},\n",
" {'entity': 'ok',\n",
" 'score': 0.7310256,\n",
" 'index': 274,\n",
" 'word': 'but',\n",
" 'start': 1136,\n",
" 'end': 1139},\n",
" {'entity': 'ok',\n",
" 'score': 0.98565906,\n",
" 'index': 275,\n",
" 'word': 'really',\n",
" 'start': 1140,\n",
" 'end': 1146},\n",
" {'entity': 'ok',\n",
" 'score': 0.94660145,\n",
" 'index': 276,\n",
" 'word': ',',\n",
" 'start': 1146,\n",
" 'end': 1147},\n",
" {'entity': 'typo',\n",
" 'score': 0.9548484,\n",
" 'index': 277,\n",
" 'word': 'the',\n",
" 'start': 1148,\n",
" 'end': 1151},\n",
" {'entity': 'ok',\n",
" 'score': 0.85286355,\n",
" 'index': 278,\n",
" 'word': 'defender',\n",
" 'start': 1152,\n",
" 'end': 1160},\n",
" {'entity': 'ok',\n",
" 'score': 0.919405,\n",
" 'index': 279,\n",
" 'word': '##s',\n",
" 'start': 1160,\n",
" 'end': 1161},\n",
" {'entity': 'ok',\n",
" 'score': 0.97006005,\n",
" 'index': 280,\n",
" 'word': 'of',\n",
" 'start': 1162,\n",
" 'end': 1164},\n",
" {'entity': 'ok',\n",
" 'score': 0.929363,\n",
" 'index': 281,\n",
" 'word': 'the',\n",
" 'start': 1165,\n",
" 'end': 1168},\n",
" {'entity': 'ok',\n",
" 'score': 0.7226242,\n",
" 'index': 282,\n",
" 'word': 'NASA',\n",
" 'start': 1169,\n",
" 'end': 1173},\n",
" {'entity': 'typo',\n",
" 'score': 0.8097878,\n",
" 'index': 283,\n",
" 'word': 'budget',\n",
" 'start': 1174,\n",
" 'end': 1180},\n",
" {'entity': 'ok',\n",
" 'score': 0.95355994,\n",
" 'index': 284,\n",
" 'word': 'wish',\n",
" 'start': 1181,\n",
" 'end': 1185},\n",
" {'entity': 'typo',\n",
" 'score': 0.9236313,\n",
" 'index': 285,\n",
" 'word': 'there',\n",
" 'start': 1186,\n",
" 'end': 1191},\n",
" {'entity': 'ok',\n",
" 'score': 0.7190513,\n",
" 'index': 286,\n",
" 'word': 'was',\n",
" 'start': 1192,\n",
" 'end': 1195},\n",
" {'entity': 'ok',\n",
" 'score': 0.66554,\n",
" 'index': 287,\n",
" 'word': 'ancient',\n",
" 'start': 1196,\n",
" 'end': 1203},\n",
" {'entity': 'ok',\n",
" 'score': 0.60441756,\n",
" 'index': 288,\n",
" 'word': 'civili',\n",
" 'start': 1204,\n",
" 'end': 1210},\n",
" {'entity': 'typo',\n",
" 'score': 0.6450192,\n",
" 'index': 289,\n",
" 'word': '##zation',\n",
" 'start': 1210,\n",
" 'end': 1216},\n",
" {'entity': 'ok',\n",
" 'score': 0.6214468,\n",
" 'index': 290,\n",
" 'word': 'on',\n",
" 'start': 1217,\n",
" 'end': 1219},\n",
" {'entity': 'ok',\n",
" 'score': 0.9483295,\n",
" 'index': 291,\n",
" 'word': 'Mars',\n",
" 'start': 1220,\n",
" 'end': 1224},\n",
" {'entity': 'typo',\n",
" 'score': 0.7586299,\n",
" 'index': 292,\n",
" 'word': '.',\n",
" 'start': 1224,\n",
" 'end': 1225},\n",
" {'entity': 'typo',\n",
" 'score': 0.8658028,\n",
" 'index': 293,\n",
" 'word': 'We',\n",
" 'start': 1226,\n",
" 'end': 1228},\n",
" {'entity': 'typo',\n",
" 'score': 0.65636414,\n",
" 'index': 294,\n",
" 'word': 'decided',\n",
" 'start': 1229,\n",
" 'end': 1236},\n",
" {'entity': 'ok',\n",
" 'score': 0.997926,\n",
" 'index': 295,\n",
" 'word': 'to',\n",
" 'start': 1237,\n",
" 'end': 1239},\n",
" {'entity': 'typo',\n",
" 'score': 0.5479873,\n",
" 'index': 296,\n",
" 'word': 'take',\n",
" 'start': 1240,\n",
" 'end': 1244},\n",
" {'entity': 'ok',\n",
" 'score': 0.99164516,\n",
" 'index': 297,\n",
" 'word': 'another',\n",
" 'start': 1245,\n",
" 'end': 1252},\n",
" {'entity': 'ok',\n",
" 'score': 0.9741786,\n",
" 'index': 298,\n",
" 'word': 'shot',\n",
" 'start': 1253,\n",
" 'end': 1257},\n",
" {'entity': 'typo',\n",
" 'score': 0.98279405,\n",
" 'index': 299,\n",
" 'word': 'just',\n",
" 'start': 1258,\n",
" 'end': 1262},\n",
" {'entity': 'ok',\n",
" 'score': 0.86857635,\n",
" 'index': 300,\n",
" 'word': 'to',\n",
" 'start': 1263,\n",
" 'end': 1265},\n",
" {'entity': 'ok',\n",
" 'score': 0.5942953,\n",
" 'index': 301,\n",
" 'word': 'make',\n",
" 'start': 1266,\n",
" 'end': 1270},\n",
" {'entity': 'ok',\n",
" 'score': 0.9985331,\n",
" 'index': 302,\n",
" 'word': 'sure',\n",
" 'start': 1271,\n",
" 'end': 1275},\n",
" {'entity': 'typo',\n",
" 'score': 0.7710986,\n",
" 'index': 303,\n",
" 'word': 'we',\n",
" 'start': 1276,\n",
" 'end': 1278},\n",
" {'entity': 'ok',\n",
" 'score': 0.6990816,\n",
" 'index': 304,\n",
" 'word': 'were',\n",
" 'start': 1279,\n",
" 'end': 1283},\n",
" {'entity': 'ok',\n",
" 'score': 0.8634561,\n",
" 'index': 305,\n",
" 'word': '##n',\n",
" 'start': 1283,\n",
" 'end': 1284},\n",
" {'entity': 'ok',\n",
" 'score': 0.99818283,\n",
" 'index': 306,\n",
" 'word': \"'\",\n",
" 'start': 1284,\n",
" 'end': 1285},\n",
" {'entity': 'ok',\n",
" 'score': 0.99803716,\n",
" 'index': 307,\n",
" 'word': 't',\n",
" 'start': 1285,\n",
" 'end': 1286},\n",
" {'entity': 'ok',\n",
" 'score': 0.9882159,\n",
" 'index': 308,\n",
" 'word': 'wrong',\n",
" 'start': 1287,\n",
" 'end': 1292},\n",
" {'entity': 'typo',\n",
" 'score': 0.88255525,\n",
" 'index': 309,\n",
" 'word': ',',\n",
" 'start': 1292,\n",
" 'end': 1293},\n",
" {'entity': 'typo',\n",
" 'score': 0.9208057,\n",
" 'index': 310,\n",
" 'word': 'on',\n",
" 'start': 1294,\n",
" 'end': 1296},\n",
" {'entity': 'typo',\n",
" 'score': 0.91730064,\n",
" 'index': 311,\n",
" 'word': 'April',\n",
" 'start': 1297,\n",
" 'end': 1302},\n",
" {'entity': 'ok',\n",
" 'score': 0.5459038,\n",
" 'index': 312,\n",
" 'word': '5',\n",
" 'start': 1303,\n",
" 'end': 1304},\n",
" {'entity': 'ok',\n",
" 'score': 0.5843325,\n",
" 'index': 313,\n",
" 'word': ',',\n",
" 'start': 1304,\n",
" 'end': 1305},\n",
" {'entity': 'typo',\n",
" 'score': 0.8402537,\n",
" 'index': 314,\n",
" 'word': '1998',\n",
" 'start': 1306,\n",
" 'end': 1310},\n",
" {'entity': 'typo',\n",
" 'score': 0.9408526,\n",
" 'index': 315,\n",
" 'word': '.',\n",
" 'start': 1310,\n",
" 'end': 1311},\n",
" {'entity': 'typo',\n",
" 'score': 0.9976394,\n",
" 'index': 316,\n",
" 'word': 'Michael',\n",
" 'start': 1312,\n",
" 'end': 1319},\n",
" {'entity': 'ok',\n",
" 'score': 0.96120375,\n",
" 'index': 317,\n",
" 'word': 'Mali',\n",
" 'start': 1320,\n",
" 'end': 1324},\n",
" {'entity': 'ok',\n",
" 'score': 0.923244,\n",
" 'index': 318,\n",
" 'word': '##n',\n",
" 'start': 1324,\n",
" 'end': 1325},\n",
" {'entity': 'ok',\n",
" 'score': 0.97935665,\n",
" 'index': 319,\n",
" 'word': 'and',\n",
" 'start': 1326,\n",
" 'end': 1329},\n",
" {'entity': 'typo',\n",
" 'score': 0.7523317,\n",
" 'index': 320,\n",
" 'word': 'his',\n",
" 'start': 1330,\n",
" 'end': 1333},\n",
" {'entity': 'typo',\n",
" 'score': 0.91937953,\n",
" 'index': 321,\n",
" 'word': 'Mars',\n",
" 'start': 1334,\n",
" 'end': 1338},\n",
" {'entity': 'ok',\n",
" 'score': 0.8556037,\n",
" 'index': 322,\n",
" 'word': 'Or',\n",
" 'start': 1339,\n",
" 'end': 1341},\n",
" {'entity': 'ok',\n",
" 'score': 0.84279835,\n",
" 'index': 323,\n",
" 'word': '##biter',\n",
" 'start': 1341,\n",
" 'end': 1346},\n",
" {'entity': 'typo',\n",
" 'score': 0.9986338,\n",
" 'index': 324,\n",
" 'word': 'camera',\n",
" 'start': 1347,\n",
" 'end': 1353},\n",
" {'entity': 'ok',\n",
" 'score': 0.90062076,\n",
" 'index': 325,\n",
" 'word': 'team',\n",
" 'start': 1354,\n",
" 'end': 1358},\n",
" {'entity': 'typo',\n",
" 'score': 0.91712064,\n",
" 'index': 326,\n",
" 'word': 'took',\n",
" 'start': 1359,\n",
" 'end': 1363},\n",
" {'entity': 'ok',\n",
" 'score': 0.785382,\n",
" 'index': 327,\n",
" 'word': 'a',\n",
" 'start': 1364,\n",
" 'end': 1365},\n",
" {'entity': 'typo',\n",
" 'score': 0.9040174,\n",
" 'index': 328,\n",
" 'word': 'picture',\n",
" 'start': 1366,\n",
" 'end': 1373},\n",
" {'entity': 'ok',\n",
" 'score': 0.9086188,\n",
" 'index': 329,\n",
" 'word': 'that',\n",
" 'start': 1374,\n",
" 'end': 1378},\n",
" {'entity': 'typo',\n",
" 'score': 0.65382206,\n",
" 'index': 330,\n",
" 'word': 'was',\n",
" 'start': 1379,\n",
" 'end': 1382},\n",
" {'entity': 'ok',\n",
" 'score': 0.95172024,\n",
" 'index': 331,\n",
" 'word': 'ten',\n",
" 'start': 1383,\n",
" 'end': 1386},\n",
" {'entity': 'ok',\n",
" 'score': 0.9873766,\n",
" 'index': 332,\n",
" 'word': 'times',\n",
" 'start': 1387,\n",
" 'end': 1392},\n",
" {'entity': 'ok',\n",
" 'score': 0.67130685,\n",
" 'index': 333,\n",
" 'word': 'sharp',\n",
" 'start': 1393,\n",
" 'end': 1398},\n",
" {'entity': 'ok',\n",
" 'score': 0.9719016,\n",
" 'index': 334,\n",
" 'word': '##er',\n",
" 'start': 1398,\n",
" 'end': 1400},\n",
" {'entity': 'ok',\n",
" 'score': 0.99000204,\n",
" 'index': 335,\n",
" 'word': 'than',\n",
" 'start': 1401,\n",
" 'end': 1405},\n",
" {'entity': 'ok',\n",
" 'score': 0.93401396,\n",
" 'index': 336,\n",
" 'word': 'the',\n",
" 'start': 1406,\n",
" 'end': 1409},\n",
" {'entity': 'ok',\n",
" 'score': 0.77596074,\n",
" 'index': 337,\n",
" 'word': 'original',\n",
" 'start': 1410,\n",
" 'end': 1418},\n",
" {'entity': 'typo',\n",
" 'score': 0.7519366,\n",
" 'index': 338,\n",
" 'word': 'Viking',\n",
" 'start': 1419,\n",
" 'end': 1425},\n",
" {'entity': 'ok',\n",
" 'score': 0.66892004,\n",
" 'index': 339,\n",
" 'word': 'photos',\n",
" 'start': 1426,\n",
" 'end': 1432},\n",
" {'entity': 'typo',\n",
" 'score': 0.98212206,\n",
" 'index': 340,\n",
" 'word': ',',\n",
" 'start': 1432,\n",
" 'end': 1433},\n",
" {'entity': 'typo',\n",
" 'score': 0.99039525,\n",
" 'index': 341,\n",
" 'word': 'reveal',\n",
" 'start': 1434,\n",
" 'end': 1440},\n",
" {'entity': 'ok',\n",
" 'score': 0.53409153,\n",
" 'index': 342,\n",
" 'word': '##ing',\n",
" 'start': 1440,\n",
" 'end': 1443},\n",
" {'entity': 'typo',\n",
" 'score': 0.86396545,\n",
" 'index': 343,\n",
" 'word': 'a',\n",
" 'start': 1444,\n",
" 'end': 1445},\n",
" {'entity': 'typo',\n",
" 'score': 0.6633433,\n",
" 'index': 344,\n",
" 'word': 'natural',\n",
" 'start': 1446,\n",
" 'end': 1453},\n",
" {'entity': 'ok',\n",
" 'score': 0.9973527,\n",
" 'index': 345,\n",
" 'word': 'land',\n",
" 'start': 1454,\n",
" 'end': 1458},\n",
" {'entity': 'ok',\n",
" 'score': 0.8840458,\n",
" 'index': 346,\n",
" 'word': '##form',\n",
" 'start': 1458,\n",
" 'end': 1462},\n",
" {'entity': 'typo',\n",
" 'score': 0.9950237,\n",
" 'index': 347,\n",
" 'word': ',',\n",
" 'start': 1462,\n",
" 'end': 1463},\n",
" {'entity': 'typo',\n",
" 'score': 0.9553996,\n",
" 'index': 348,\n",
" 'word': 'which',\n",
" 'start': 1464,\n",
" 'end': 1469},\n",
" {'entity': 'typo',\n",
" 'score': 0.9745846,\n",
" 'index': 349,\n",
" 'word': 'meant',\n",
" 'start': 1470,\n",
" 'end': 1475},\n",
" {'entity': 'typo',\n",
" 'score': 0.7203086,\n",
" 'index': 350,\n",
" 'word': 'no',\n",
" 'start': 1476,\n",
" 'end': 1478},\n",
" {'entity': 'ok',\n",
" 'score': 0.91405845,\n",
" 'index': 351,\n",
" 'word': 'alien',\n",
" 'start': 1479,\n",
" 'end': 1484},\n",
" {'entity': 'ok',\n",
" 'score': 0.99879634,\n",
" 'index': 352,\n",
" 'word': 'monument',\n",
" 'start': 1485,\n",
" 'end': 1493},\n",
" {'entity': 'typo',\n",
" 'score': 0.56567025,\n",
" 'index': 353,\n",
" 'word': '.',\n",
" 'start': 1493,\n",
" 'end': 1494},\n",
" {'entity': 'typo',\n",
" 'score': 0.971668,\n",
" 'index': 354,\n",
" 'word': '\"',\n",
" 'start': 1495,\n",
" 'end': 1496},\n",
" {'entity': 'ok',\n",
" 'score': 0.9891775,\n",
" 'index': 355,\n",
" 'word': 'But',\n",
" 'start': 1496,\n",
" 'end': 1499},\n",
" {'entity': 'ok',\n",
" 'score': 0.9991333,\n",
" 'index': 356,\n",
" 'word': 'that',\n",
" 'start': 1500,\n",
" 'end': 1504},\n",
" {'entity': 'ok',\n",
" 'score': 0.9964754,\n",
" 'index': 357,\n",
" 'word': 'picture',\n",
" 'start': 1505,\n",
" 'end': 1512},\n",
" {'entity': 'ok',\n",
" 'score': 0.89596707,\n",
" 'index': 358,\n",
" 'word': 'wasn',\n",
" 'start': 1513,\n",
" 'end': 1517},\n",
" {'entity': 'ok',\n",
" 'score': 0.986717,\n",
" 'index': 359,\n",
" 'word': \"'\",\n",
" 'start': 1517,\n",
" 'end': 1518},\n",
" {'entity': 'ok',\n",
" 'score': 0.98859054,\n",
" 'index': 360,\n",
" 'word': 't',\n",
" 'start': 1518,\n",
" 'end': 1519},\n",
" {'entity': 'ok',\n",
" 'score': 0.9508471,\n",
" 'index': 361,\n",
" 'word': 'very',\n",
" 'start': 1520,\n",
" 'end': 1524},\n",
" {'entity': 'ok',\n",
" 'score': 0.9967194,\n",
" 'index': 362,\n",
" 'word': 'clear',\n",
" 'start': 1525,\n",
" 'end': 1530},\n",
" {'entity': 'ok',\n",
" 'score': 0.69285977,\n",
" 'index': 363,\n",
" 'word': 'at',\n",
" 'start': 1531,\n",
" 'end': 1533},\n",
" {'entity': 'ok',\n",
" 'score': 0.9915514,\n",
" 'index': 364,\n",
" 'word': 'all',\n",
" 'start': 1534,\n",
" 'end': 1537},\n",
" {'entity': 'ok',\n",
" 'score': 0.5899336,\n",
" 'index': 365,\n",
" 'word': ',',\n",
" 'start': 1537,\n",
" 'end': 1538},\n",
" {'entity': 'ok',\n",
" 'score': 0.5996167,\n",
" 'index': 366,\n",
" 'word': 'which',\n",
" 'start': 1539,\n",
" 'end': 1544},\n",
" {'entity': 'typo',\n",
" 'score': 0.8670836,\n",
" 'index': 367,\n",
" 'word': 'could',\n",
" 'start': 1545,\n",
" 'end': 1550},\n",
" {'entity': 'ok',\n",
" 'score': 0.9761153,\n",
" 'index': 368,\n",
" 'word': 'mean',\n",
" 'start': 1551,\n",
" 'end': 1555},\n",
" {'entity': 'typo',\n",
" 'score': 0.7312487,\n",
" 'index': 369,\n",
" 'word': 'alien',\n",
" 'start': 1556,\n",
" 'end': 1561},\n",
" {'entity': 'ok',\n",
" 'score': 0.8062085,\n",
" 'index': 370,\n",
" 'word': 'marking',\n",
" 'start': 1562,\n",
" 'end': 1569},\n",
" {'entity': 'typo',\n",
" 'score': 0.80552965,\n",
" 'index': 371,\n",
" 'word': '##s',\n",
" 'start': 1569,\n",
" 'end': 1570},\n",
" {'entity': 'typo',\n",
" 'score': 0.97620255,\n",
" 'index': 372,\n",
" 'word': 'were',\n",
" 'start': 1571,\n",
" 'end': 1575},\n",
" {'entity': 'typo',\n",
" 'score': 0.91833067,\n",
" 'index': 373,\n",
" 'word': 'hidden',\n",
" 'start': 1576,\n",
" 'end': 1582},\n",
" {'entity': 'typo',\n",
" 'score': 0.8934933,\n",
" 'index': 374,\n",
" 'word': 'by',\n",
" 'start': 1583,\n",
" 'end': 1585},\n",
" {'entity': 'typo',\n",
" 'score': 0.9554798,\n",
" 'index': 375,\n",
" 'word': 'ha',\n",
" 'start': 1586,\n",
" 'end': 1588},\n",
" {'entity': 'ok',\n",
" 'score': 0.6562142,\n",
" 'index': 376,\n",
" 'word': '##ze',\n",
" 'start': 1588,\n",
" 'end': 1590},\n",
" {'entity': 'ok',\n",
" 'score': 0.64348394,\n",
" 'index': 377,\n",
" 'word': '\"',\n",
" 'start': 1590,\n",
" 'end': 1591},\n",
" {'entity': 'ok',\n",
" 'score': 0.949757,\n",
" 'index': 378,\n",
" 'word': 'Well',\n",
" 'start': 1592,\n",
" 'end': 1596},\n",
" {'entity': 'ok',\n",
" 'score': 0.99965537,\n",
" 'index': 379,\n",
" 'word': 'no',\n",
" 'start': 1597,\n",
" 'end': 1599},\n",
" {'entity': 'ok',\n",
" 'score': 0.9982284,\n",
" 'index': 380,\n",
" 'word': ',',\n",
" 'start': 1599,\n",
" 'end': 1600},\n",
" {'entity': 'typo',\n",
" 'score': 0.7986995,\n",
" 'index': 381,\n",
" 'word': 'ye',\n",
" 'start': 1601,\n",
" 'end': 1603},\n",
" {'entity': 'ok',\n",
" 'score': 0.9877543,\n",
" 'index': 382,\n",
" 'word': '##s',\n",
" 'start': 1603,\n",
" 'end': 1604},\n",
" {'entity': 'ok',\n",
" 'score': 0.9989477,\n",
" 'index': 383,\n",
" 'word': 'that',\n",
" 'start': 1605,\n",
" 'end': 1609},\n",
" {'entity': 'typo',\n",
" 'score': 0.8178804,\n",
" 'index': 384,\n",
" 'word': 'rum',\n",
" 'start': 1610,\n",
" 'end': 1613},\n",
" {'entity': 'ok',\n",
" 'score': 0.9868613,\n",
" 'index': 385,\n",
" 'word': '##or',\n",
" 'start': 1613,\n",
" 'end': 1615},\n",
" {'entity': 'ok',\n",
" 'score': 0.90991825,\n",
" 'index': 386,\n",
" 'word': 'started',\n",
" 'start': 1616,\n",
" 'end': 1623},\n",
" {'entity': 'typo',\n",
" 'score': 0.5242705,\n",
" 'index': 387,\n",
" 'word': ',',\n",
" 'start': 1623,\n",
" 'end': 1624},\n",
" {'entity': 'ok',\n",
" 'score': 0.9543154,\n",
" 'index': 388,\n",
" 'word': 'but',\n",
" 'start': 1625,\n",
" 'end': 1628},\n",
" {'entity': 'ok',\n",
" 'score': 0.86853844,\n",
" 'index': 389,\n",
" 'word': 'to',\n",
" 'start': 1629,\n",
" 'end': 1631},\n",
" {'entity': 'ok',\n",
" 'score': 0.7432325,\n",
" 'index': 390,\n",
" 'word': 'prove',\n",
" 'start': 1632,\n",
" 'end': 1637},\n",
" {'entity': 'ok',\n",
" 'score': 0.9467291,\n",
" 'index': 391,\n",
" 'word': 'them',\n",
" 'start': 1638,\n",
" 'end': 1642},\n",
" {'entity': 'typo',\n",
" 'score': 0.7128569,\n",
" 'index': 392,\n",
" 'word': 'wrong',\n",
" 'start': 1643,\n",
" 'end': 1648},\n",
" {'entity': 'ok',\n",
" 'score': 0.62498444,\n",
" 'index': 393,\n",
" 'word': 'on',\n",
" 'start': 1649,\n",
" 'end': 1651},\n",
" {'entity': 'typo',\n",
" 'score': 0.6839772,\n",
" 'index': 394,\n",
" 'word': 'April',\n",
" 'start': 1652,\n",
" 'end': 1657},\n",
" {'entity': 'ok',\n",
" 'score': 0.68691665,\n",
" 'index': 395,\n",
" 'word': '8',\n",
" 'start': 1658,\n",
" 'end': 1659},\n",
" {'entity': 'ok',\n",
" 'score': 0.84025437,\n",
" 'index': 396,\n",
" 'word': ',',\n",
" 'start': 1659,\n",
" 'end': 1660},\n",
" {'entity': 'typo',\n",
" 'score': 0.9157925,\n",
" 'index': 397,\n",
" 'word': '2001',\n",
" 'start': 1661,\n",
" 'end': 1665},\n",
" {'entity': 'typo',\n",
" 'score': 0.7844509,\n",
" 'index': 398,\n",
" 'word': 'we',\n",
" 'start': 1666,\n",
" 'end': 1668},\n",
" {'entity': 'typo',\n",
" 'score': 0.5589368,\n",
" 'index': 399,\n",
" 'word': 'decided',\n",
" 'start': 1669,\n",
" 'end': 1676},\n",
" {'entity': 'ok',\n",
" 'score': 0.99429286,\n",
" 'index': 400,\n",
" 'word': 'to',\n",
" 'start': 1677,\n",
" 'end': 1679},\n",
" {'entity': 'typo',\n",
" 'score': 0.84421164,\n",
" 'index': 401,\n",
" 'word': 'take',\n",
" 'start': 1680,\n",
" 'end': 1684},\n",
" {'entity': 'ok',\n",
" 'score': 0.978843,\n",
" 'index': 402,\n",
" 'word': 'another',\n",
" 'start': 1685,\n",
" 'end': 1692},\n",
" {'entity': 'ok',\n",
" 'score': 0.59987646,\n",
" 'index': 403,\n",
" 'word': 'picture',\n",
" 'start': 1693,\n",
" 'end': 1700},\n",
" {'entity': 'typo',\n",
" 'score': 0.7050702,\n",
" 'index': 404,\n",
" 'word': ',',\n",
" 'start': 1700,\n",
" 'end': 1701},\n",
" {'entity': 'ok',\n",
" 'score': 0.95890915,\n",
" 'index': 405,\n",
" 'word': 'making',\n",
" 'start': 1702,\n",
" 'end': 1708},\n",
" {'entity': 'ok',\n",
" 'score': 0.99975187,\n",
" 'index': 406,\n",
" 'word': 'sure',\n",
" 'start': 1709,\n",
" 'end': 1713},\n",
" {'entity': 'ok',\n",
" 'score': 0.98025346,\n",
" 'index': 407,\n",
" 'word': 'it',\n",
" 'start': 1714,\n",
" 'end': 1716},\n",
" {'entity': 'ok',\n",
" 'score': 0.83194023,\n",
" 'index': 408,\n",
" 'word': 'was',\n",
" 'start': 1717,\n",
" 'end': 1720},\n",
" {'entity': 'typo',\n",
" 'score': 0.9467474,\n",
" 'index': 409,\n",
" 'word': 'a',\n",
" 'start': 1721,\n",
" 'end': 1722},\n",
" {'entity': 'typo',\n",
" 'score': 0.91021883,\n",
" 'index': 410,\n",
" 'word': 'cloud',\n",
" 'start': 1723,\n",
" 'end': 1728},\n",
" {'entity': 'ok',\n",
" 'score': 0.9859971,\n",
" 'index': 411,\n",
" 'word': '##less',\n",
" 'start': 1728,\n",
" 'end': 1732},\n",
" {'entity': 'typo',\n",
" 'score': 0.98307884,\n",
" 'index': 412,\n",
" 'word': 'summer',\n",
" 'start': 1733,\n",
" 'end': 1739},\n",
" {'entity': 'ok',\n",
" 'score': 0.994193,\n",
" 'index': 413,\n",
" 'word': 'day',\n",
" 'start': 1740,\n",
" 'end': 1743},\n",
" {'entity': 'typo',\n",
" 'score': 0.4313977,\n",
" 'index': 414,\n",
" 'word': '.',\n",
" 'start': 1743,\n",
" 'end': 1744},\n",
" {'entity': 'typo',\n",
" 'score': 0.96405447,\n",
" 'index': 415,\n",
" 'word': 'Mali',\n",
" 'start': 1745,\n",
" 'end': 1749},\n",
" {'entity': 'ok',\n",
" 'score': 0.6155722,\n",
" 'index': 416,\n",
" 'word': '##n',\n",
" 'start': 1749,\n",
" 'end': 1750},\n",
" {'entity': 'ok',\n",
" 'score': 0.9290005,\n",
" 'index': 417,\n",
" 'word': \"'\",\n",
" 'start': 1750,\n",
" 'end': 1751},\n",
" {'entity': 'ok',\n",
" 'score': 0.90629065,\n",
" 'index': 418,\n",
" 'word': 's',\n",
" 'start': 1751,\n",
" 'end': 1752},\n",
" {'entity': 'ok',\n",
" 'score': 0.868706,\n",
" 'index': 419,\n",
" 'word': 'team',\n",
" 'start': 1753,\n",
" 'end': 1757},\n",
" {'entity': 'ok',\n",
" 'score': 0.8017526,\n",
" 'index': 420,\n",
" 'word': 'captured',\n",
" 'start': 1758,\n",
" 'end': 1766},\n",
" {'entity': 'ok',\n",
" 'score': 0.7134167,\n",
" 'index': 421,\n",
" 'word': 'an',\n",
" 'start': 1767,\n",
" 'end': 1769},\n",
" {'entity': 'typo',\n",
" 'score': 0.5621726,\n",
" 'index': 422,\n",
" 'word': 'ama',\n",
" 'start': 1770,\n",
" 'end': 1773},\n",
" {'entity': 'ok',\n",
" 'score': 0.7826029,\n",
" 'index': 423,\n",
" 'word': '##zing',\n",
" 'start': 1773,\n",
" 'end': 1777},\n",
" {'entity': 'ok',\n",
" 'score': 0.99448264,\n",
" 'index': 424,\n",
" 'word': 'photo',\n",
" 'start': 1778,\n",
" 'end': 1783},\n",
" {'entity': 'ok',\n",
" 'score': 0.73524755,\n",
" 'index': 425,\n",
" 'word': 'using',\n",
" 'start': 1784,\n",
" 'end': 1789},\n",
" {'entity': 'ok',\n",
" 'score': 0.6591615,\n",
" 'index': 426,\n",
" 'word': 'the',\n",
" 'start': 1790,\n",
" 'end': 1793},\n",
" {'entity': 'typo',\n",
" 'score': 0.86533505,\n",
" 'index': 427,\n",
" 'word': 'camera',\n",
" 'start': 1794,\n",
" 'end': 1800},\n",
" {'entity': 'ok',\n",
" 'score': 0.90541,\n",
" 'index': 428,\n",
" 'word': \"'\",\n",
" 'start': 1800,\n",
" 'end': 1801},\n",
" {'entity': 'ok',\n",
" 'score': 0.9281528,\n",
" 'index': 429,\n",
" 'word': 's',\n",
" 'start': 1801,\n",
" 'end': 1802},\n",
" {'entity': 'typo',\n",
" 'score': 0.8930355,\n",
" 'index': 430,\n",
" 'word': 'absolute',\n",
" 'start': 1803,\n",
" 'end': 1811},\n",
" {'entity': 'ok',\n",
" 'score': 0.9972451,\n",
" 'index': 431,\n",
" 'word': 'maximum',\n",
" 'start': 1812,\n",
" 'end': 1819},\n",
" {'entity': 'ok',\n",
" 'score': 0.999348,\n",
" 'index': 432,\n",
" 'word': 'revolution',\n",
" 'start': 1820,\n",
" 'end': 1830},\n",
" {'entity': 'typo',\n",
" 'score': 0.6555487,\n",
" 'index': 433,\n",
" 'word': '.',\n",
" 'start': 1830,\n",
" 'end': 1831},\n",
" {'entity': 'ok',\n",
" 'score': 0.5426304,\n",
" 'index': 434,\n",
" 'word': 'With',\n",
" 'start': 1832,\n",
" 'end': 1836},\n",
" {'entity': 'ok',\n",
" 'score': 0.9795935,\n",
" 'index': 435,\n",
" 'word': 'this',\n",
" 'start': 1837,\n",
" 'end': 1841},\n",
" {'entity': 'ok',\n",
" 'score': 0.9408998,\n",
" 'index': 436,\n",
" 'word': 'camera',\n",
" 'start': 1842,\n",
" 'end': 1848},\n",
" {'entity': 'typo',\n",
" 'score': 0.9998209,\n",
" 'index': 437,\n",
" 'word': 'you',\n",
" 'start': 1849,\n",
" 'end': 1852},\n",
" {'entity': 'ok',\n",
" 'score': 0.9576567,\n",
" 'index': 438,\n",
" 'word': 'can',\n",
" 'start': 1853,\n",
" 'end': 1856},\n",
" {'entity': 'typo',\n",
" 'score': 0.573261,\n",
" 'index': 439,\n",
" 'word': 'disc',\n",
" 'start': 1857,\n",
" 'end': 1861},\n",
" {'entity': 'ok',\n",
" 'score': 0.9691068,\n",
" 'index': 440,\n",
" 'word': '##ern',\n",
" 'start': 1861,\n",
" 'end': 1864},\n",
" {'entity': 'ok',\n",
" 'score': 0.8217143,\n",
" 'index': 441,\n",
" 'word': 'things',\n",
" 'start': 1865,\n",
" 'end': 1871},\n",
" {'entity': 'typo',\n",
" 'score': 0.82698673,\n",
" 'index': 442,\n",
" 'word': 'in',\n",
" 'start': 1872,\n",
" 'end': 1874},\n",
" {'entity': 'typo',\n",
" 'score': 0.78578687,\n",
" 'index': 443,\n",
" 'word': 'a',\n",
" 'start': 1875,\n",
" 'end': 1876},\n",
" {'entity': 'typo',\n",
" 'score': 0.74662477,\n",
" 'index': 444,\n",
" 'word': 'digital',\n",
" 'start': 1877,\n",
" 'end': 1884},\n",
" {'entity': 'ok',\n",
" 'score': 0.5583283,\n",
" 'index': 445,\n",
" 'word': 'image',\n",
" 'start': 1885,\n",
" 'end': 1890},\n",
" {'entity': 'typo',\n",
" 'score': 0.99453664,\n",
" 'index': 446,\n",
" 'word': ',',\n",
" 'start': 1890,\n",
" 'end': 1891},\n",
" {'entity': 'typo',\n",
" 'score': 0.99513924,\n",
" 'index': 447,\n",
" 'word': '3',\n",
" 'start': 1892,\n",
" 'end': 1893},\n",
" {'entity': 'ok',\n",
" 'score': 0.97854996,\n",
" 'index': 448,\n",
" 'word': 'times',\n",
" 'start': 1894,\n",
" 'end': 1899},\n",
" {'entity': 'ok',\n",
" 'score': 0.9518669,\n",
" 'index': 449,\n",
" 'word': 'bigger',\n",
" 'start': 1900,\n",
" 'end': 1906},\n",
" {'entity': 'ok',\n",
" 'score': 0.9959552,\n",
" 'index': 450,\n",
" 'word': 'than',\n",
" 'start': 1907,\n",
" 'end': 1911},\n",
" {'entity': 'ok',\n",
" 'score': 0.70804185,\n",
" 'index': 451,\n",
" 'word': 'the',\n",
" 'start': 1912,\n",
" 'end': 1915},\n",
" {'entity': 'ok',\n",
" 'score': 0.7743477,\n",
" 'index': 452,\n",
" 'word': 'pi',\n",
" 'start': 1916,\n",
" 'end': 1918},\n",
" {'entity': 'ok',\n",
" 'score': 0.8590469,\n",
" 'index': 453,\n",
" 'word': '##xel',\n",
" 'start': 1918,\n",
" 'end': 1921},\n",
" {'entity': 'typo',\n",
" 'score': 0.88074666,\n",
" 'index': 454,\n",
" 'word': 'size',\n",
" 'start': 1922,\n",
" 'end': 1926},\n",
" {'entity': 'typo',\n",
" 'score': 0.977126,\n",
" 'index': 455,\n",
" 'word': 'which',\n",
" 'start': 1927,\n",
" 'end': 1932},\n",
" {'entity': 'typo',\n",
" 'score': 0.71839124,\n",
" 'index': 456,\n",
" 'word': 'means',\n",
" 'start': 1933,\n",
" 'end': 1938},\n",
" {'entity': 'ok',\n",
" 'score': 0.7110502,\n",
" 'index': 457,\n",
" 'word': 'if',\n",
" 'start': 1939,\n",
" 'end': 1941},\n",
" {'entity': 'ok',\n",
" 'score': 0.72081035,\n",
" 'index': 458,\n",
" 'word': 'there',\n",
" 'start': 1942,\n",
" 'end': 1947},\n",
" {'entity': 'ok',\n",
" 'score': 0.90700245,\n",
" 'index': 459,\n",
" 'word': 'were',\n",
" 'start': 1948,\n",
" 'end': 1952},\n",
" {'entity': 'ok',\n",
" 'score': 0.9401142,\n",
" 'index': 460,\n",
" 'word': 'any',\n",
" 'start': 1953,\n",
" 'end': 1956},\n",
" {'entity': 'ok',\n",
" 'score': 0.8010327,\n",
" 'index': 461,\n",
" 'word': 'signs',\n",
" 'start': 1957,\n",
" 'end': 1962},\n",
" {'entity': 'ok',\n",
" 'score': 0.9774201,\n",
" 'index': 462,\n",
" 'word': 'of',\n",
" 'start': 1963,\n",
" 'end': 1965},\n",
" {'entity': 'ok',\n",
" 'score': 0.72982603,\n",
" 'index': 463,\n",
" 'word': 'life',\n",
" 'start': 1966,\n",
" 'end': 1970},\n",
" {'entity': 'typo',\n",
" 'score': 0.98334414,\n",
" 'index': 464,\n",
" 'word': ',',\n",
" 'start': 1970,\n",
" 'end': 1971},\n",
" {'entity': 'typo',\n",
" 'score': 0.9922644,\n",
" 'index': 465,\n",
" 'word': 'you',\n",
" 'start': 1972,\n",
" 'end': 1975},\n",
" {'entity': 'ok',\n",
" 'score': 0.9828555,\n",
" 'index': 466,\n",
" 'word': 'could',\n",
" 'start': 1976,\n",
" 'end': 1981},\n",
" {'entity': 'ok',\n",
" 'score': 0.9977181,\n",
" 'index': 467,\n",
" 'word': 'easily',\n",
" 'start': 1982,\n",
" 'end': 1988},\n",
" {'entity': 'ok',\n",
" 'score': 0.99048984,\n",
" 'index': 468,\n",
" 'word': 'see',\n",
" 'start': 1989,\n",
" 'end': 1992},\n",
" {'entity': 'ok',\n",
" 'score': 0.99270344,\n",
" 'index': 469,\n",
" 'word': 'what',\n",
" 'start': 1993,\n",
" 'end': 1997},\n",
" {'entity': 'ok',\n",
" 'score': 0.7724791,\n",
" 'index': 470,\n",
" 'word': 'they',\n",
" 'start': 1998,\n",
" 'end': 2002},\n",
" {'entity': 'ok',\n",
" 'score': 0.9829512,\n",
" 'index': 471,\n",
" 'word': 'were',\n",
" 'start': 2003,\n",
" 'end': 2007},\n",
" {'entity': 'typo',\n",
" 'score': 0.9201129,\n",
" 'index': 472,\n",
" 'word': '.',\n",
" 'start': 2007,\n",
" 'end': 2008},\n",
" {'entity': 'typo',\n",
" 'score': 0.51953334,\n",
" 'index': 473,\n",
" 'word': 'What',\n",
" 'start': 2009,\n",
" 'end': 2013},\n",
" {'entity': 'ok',\n",
" 'score': 0.9992943,\n",
" 'index': 474,\n",
" 'word': 'the',\n",
" 'start': 2014,\n",
" 'end': 2017},\n",
" {'entity': 'ok',\n",
" 'score': 0.98957855,\n",
" 'index': 475,\n",
" 'word': 'picture',\n",
" 'start': 2018,\n",
" 'end': 2025},\n",
" {'entity': 'typo',\n",
" 'score': 0.58303297,\n",
" 'index': 476,\n",
" 'word': 'showed',\n",
" 'start': 2026,\n",
" 'end': 2032},\n",
" {'entity': 'typo',\n",
" 'score': 0.8085752,\n",
" 'index': 477,\n",
" 'word': 'was',\n",
" 'start': 2033,\n",
" 'end': 2036},\n",
" {'entity': 'typo',\n",
" 'score': 0.9929074,\n",
" 'index': 478,\n",
" 'word': 'the',\n",
" 'start': 2037,\n",
" 'end': 2040},\n",
" {'entity': 'typo',\n",
" 'score': 0.9818252,\n",
" 'index': 479,\n",
" 'word': 'but',\n",
" 'start': 2041,\n",
" 'end': 2044},\n",
" {'entity': 'ok',\n",
" 'score': 0.5864657,\n",
" 'index': 480,\n",
" 'word': '##te',\n",
" 'start': 2044,\n",
" 'end': 2046},\n",
" {'entity': 'ok',\n",
" 'score': 0.9995679,\n",
" 'index': 481,\n",
" 'word': 'or',\n",
" 'start': 2047,\n",
" 'end': 2049},\n",
" {'entity': 'ok',\n",
" 'score': 0.8257528,\n",
" 'index': 482,\n",
" 'word': 'mesa',\n",
" 'start': 2050,\n",
" 'end': 2054},\n",
" {'entity': 'typo',\n",
" 'score': 0.78061324,\n",
" 'index': 483,\n",
" 'word': ',',\n",
" 'start': 2054,\n",
" 'end': 2055},\n",
" {'entity': 'typo',\n",
" 'score': 0.529569,\n",
" 'index': 484,\n",
" 'word': 'which',\n",
" 'start': 2056,\n",
" 'end': 2061},\n",
" {'entity': 'typo',\n",
" 'score': 0.98165315,\n",
" 'index': 485,\n",
" 'word': 'are',\n",
" 'start': 2062,\n",
" 'end': 2065},\n",
" {'entity': 'ok',\n",
" 'score': 0.7917957,\n",
" 'index': 486,\n",
" 'word': 'land',\n",
" 'start': 2066,\n",
" 'end': 2070},\n",
" {'entity': 'ok',\n",
" 'score': 0.8484906,\n",
" 'index': 487,\n",
" 'word': '##form',\n",
" 'start': 2070,\n",
" 'end': 2074},\n",
" {'entity': 'typo',\n",
" 'score': 0.5294084,\n",
" 'index': 488,\n",
" 'word': '##s',\n",
" 'start': 2074,\n",
" 'end': 2075},\n",
" {'entity': 'typo',\n",
" 'score': 0.66130173,\n",
" 'index': 489,\n",
" 'word': 'common',\n",
" 'start': 2076,\n",
" 'end': 2082},\n",
" {'entity': 'typo',\n",
" 'score': 0.79395646,\n",
" 'index': 490,\n",
" 'word': 'around',\n",
" 'start': 2083,\n",
" 'end': 2089},\n",
" {'entity': 'typo',\n",
" 'score': 0.80247337,\n",
" 'index': 491,\n",
" 'word': 'the',\n",
" 'start': 2090,\n",
" 'end': 2093},\n",
" {'entity': 'typo',\n",
" 'score': 0.90861714,\n",
" 'index': 492,\n",
" 'word': 'American',\n",
" 'start': 2094,\n",
" 'end': 2102},\n",
" {'entity': 'ok',\n",
" 'score': 0.916328,\n",
" 'index': 493,\n",
" 'word': 'West',\n",
" 'start': 2103,\n",
" 'end': 2107}]"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import pipeline\n",
"\n",
"typo_checker = pipeline(\n",
" \"ner\",\n",
" model=\"mrm8488/distilbert-base-multi-cased-finetuned-typo-detection\",\n",
" tokenizer=\"mrm8488/distilbert-base-multi-cased-finetuned-typo-detection\"\n",
")\n",
"\n",
"result = typo_checker(text)\n",
"result[1:-1]\n"
]
},
{
"cell_type": "code",
"execution_count": 63,
"id": "7b8311a5-68cc-45cb-b2a8-94495aa18113",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"ok 328\n",
"typo 164\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word \n",
"ok 7\n",
" ##adow 3\n",
" ##ara 1\n",
" ##biter 1\n",
" ##con 1\n",
" ..\n",
"typo when 1\n",
" which 4\n",
" wrong 1\n",
" ye 1\n",
" you 3\n",
"Length: 298, dtype: int64"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open(\"5 metricas mrm8488.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "25e685d8-ff8a-43ec-8ab0-958a89056ee8",
"metadata": {},
"source": [
"## 6 sagorsarker/codeswitch-spaeng-ner-lince "
]
},
{
"cell_type": "code",
"execution_count": 128,
"id": "5e5dcd1c-f49d-486d-9d72-e0ce4498a565",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Some weights of the model checkpoint at sagorsarker/codeswitch-spaeng-ner-lince were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']\n",
"- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
]
},
{
"data": {
"text/plain": [
"[{'entity': 'B-ORG',\n",
" 'score': 0.90762955,\n",
" 'index': 8,\n",
" 'word': 'NASA',\n",
" 'start': 16,\n",
" 'end': 20},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.45779723,\n",
" 'index': 25,\n",
" 'word': 'Mars',\n",
" 'start': 96,\n",
" 'end': 100},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.67780584,\n",
" 'index': 37,\n",
" 'word': 'Mars',\n",
" 'start': 152,\n",
" 'end': 156},\n",
" {'entity': 'B-PROD',\n",
" 'score': 0.823176,\n",
" 'index': 60,\n",
" 'word': 'Viking',\n",
" 'start': 240,\n",
" 'end': 246},\n",
" {'entity': 'I-PROD',\n",
" 'score': 0.6758249,\n",
" 'index': 61,\n",
" 'word': '1',\n",
" 'start': 247,\n",
" 'end': 248},\n",
" {'entity': 'B-OTHER',\n",
" 'score': 0.5148923,\n",
" 'index': 98,\n",
" 'word': 'Mart',\n",
" 'start': 407,\n",
" 'end': 411},\n",
" {'entity': 'B-OTHER',\n",
" 'score': 0.42019445,\n",
" 'index': 99,\n",
" 'word': '##ian',\n",
" 'start': 411,\n",
" 'end': 414},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.76582533,\n",
" 'index': 104,\n",
" 'word': 'C',\n",
" 'start': 435,\n",
" 'end': 436},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.74930793,\n",
" 'index': 105,\n",
" 'word': '##yd',\n",
" 'start': 436,\n",
" 'end': 438},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.34308487,\n",
" 'index': 106,\n",
" 'word': '##onia',\n",
" 'start': 438,\n",
" 'end': 442},\n",
" {'entity': 'B-OTHER',\n",
" 'score': 0.60952294,\n",
" 'index': 121,\n",
" 'word': 'Egypt',\n",
" 'start': 496,\n",
" 'end': 501},\n",
" {'entity': 'I-OTHER',\n",
" 'score': 0.6259159,\n",
" 'index': 122,\n",
" 'word': '##ion',\n",
" 'start': 501,\n",
" 'end': 504},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.9731985,\n",
" 'index': 194,\n",
" 'word': 'NASA',\n",
" 'start': 801,\n",
" 'end': 805},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.62910813,\n",
" 'index': 205,\n",
" 'word': 'Mars',\n",
" 'start': 843,\n",
" 'end': 847},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.6685378,\n",
" 'index': 215,\n",
" 'word': 'Mars',\n",
" 'start': 875,\n",
" 'end': 879},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.71298903,\n",
" 'index': 263,\n",
" 'word': 'Mars',\n",
" 'start': 1088,\n",
" 'end': 1092},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.9437394,\n",
" 'index': 282,\n",
" 'word': 'NASA',\n",
" 'start': 1169,\n",
" 'end': 1173},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.60219324,\n",
" 'index': 291,\n",
" 'word': 'Mars',\n",
" 'start': 1220,\n",
" 'end': 1224},\n",
" {'entity': 'B-TIME',\n",
" 'score': 0.9826582,\n",
" 'index': 311,\n",
" 'word': 'April',\n",
" 'start': 1297,\n",
" 'end': 1302},\n",
" {'entity': 'I-TIME',\n",
" 'score': 0.9323657,\n",
" 'index': 312,\n",
" 'word': '5',\n",
" 'start': 1303,\n",
" 'end': 1304},\n",
" {'entity': 'I-TIME',\n",
" 'score': 0.8260366,\n",
" 'index': 313,\n",
" 'word': ',',\n",
" 'start': 1304,\n",
" 'end': 1305},\n",
" {'entity': 'I-TIME',\n",
" 'score': 0.887798,\n",
" 'index': 314,\n",
" 'word': '1998',\n",
" 'start': 1306,\n",
" 'end': 1310},\n",
" {'entity': 'B-PER',\n",
" 'score': 0.9961196,\n",
" 'index': 316,\n",
" 'word': 'Michael',\n",
" 'start': 1312,\n",
" 'end': 1319},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.99166673,\n",
" 'index': 317,\n",
" 'word': 'Mali',\n",
" 'start': 1320,\n",
" 'end': 1324},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.9911315,\n",
" 'index': 318,\n",
" 'word': '##n',\n",
" 'start': 1324,\n",
" 'end': 1325},\n",
" {'entity': 'B-PROD',\n",
" 'score': 0.6550399,\n",
" 'index': 321,\n",
" 'word': 'Mars',\n",
" 'start': 1334,\n",
" 'end': 1338},\n",
" {'entity': 'I-PROD',\n",
" 'score': 0.80179524,\n",
" 'index': 322,\n",
" 'word': 'Or',\n",
" 'start': 1339,\n",
" 'end': 1341},\n",
" {'entity': 'I-PROD',\n",
" 'score': 0.8049131,\n",
" 'index': 323,\n",
" 'word': '##biter',\n",
" 'start': 1341,\n",
" 'end': 1346},\n",
" {'entity': 'B-OTHER',\n",
" 'score': 0.35082105,\n",
" 'index': 338,\n",
" 'word': 'Viking',\n",
" 'start': 1419,\n",
" 'end': 1425},\n",
" {'entity': 'B-TIME',\n",
" 'score': 0.97395533,\n",
" 'index': 394,\n",
" 'word': 'April',\n",
" 'start': 1652,\n",
" 'end': 1657},\n",
" {'entity': 'I-TIME',\n",
" 'score': 0.66215277,\n",
" 'index': 395,\n",
" 'word': '8',\n",
" 'start': 1658,\n",
" 'end': 1659},\n",
" {'entity': 'B-GROUP',\n",
" 'score': 0.5102115,\n",
" 'index': 415,\n",
" 'word': 'Mali',\n",
" 'start': 1745,\n",
" 'end': 1749},\n",
" {'entity': 'B-GROUP',\n",
" 'score': 0.34341586,\n",
" 'index': 416,\n",
" 'word': '##n',\n",
" 'start': 1749,\n",
" 'end': 1750},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.8453891,\n",
" 'index': 492,\n",
" 'word': 'American',\n",
" 'start': 2094,\n",
" 'end': 2102},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.663866,\n",
" 'index': 493,\n",
" 'word': 'West',\n",
" 'start': 2103,\n",
" 'end': 2107}]"
]
},
"execution_count": 128,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline\n",
"\n",
"tokenizer = AutoTokenizer.from_pretrained(\"sagorsarker/codeswitch-spaeng-ner-lince\")\n",
"\n",
"model = AutoModelForTokenClassification.from_pretrained(\"sagorsarker/codeswitch-spaeng-ner-lince\")\n",
"\n",
"ner_model = pipeline('ner', model=model, tokenizer=tokenizer)\n",
"\n",
"ner_model(text)"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "58c5a81d-c1c6-48db-b26d-edd1e01d1231",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"B-GROUP 2\n",
"B-LOC 9\n",
"B-ORG 3\n",
"B-OTHER 4\n",
"B-PER 1\n",
"B-PROD 2\n",
"B-TIME 2\n",
"I-LOC 2\n",
"I-OTHER 1\n",
"I-PER 2\n",
"I-PROD 3\n",
"I-TIME 4\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word \n",
"B-GROUP ##n 1\n",
" Mali 1\n",
"B-LOC ##yd 1\n",
" American 1\n",
" C 1\n",
" Mars 6\n",
"B-ORG NASA 3\n",
"B-OTHER ##ian 1\n",
" Egypt 1\n",
" Mart 1\n",
" Viking 1\n",
"B-PER Michael 1\n",
"B-PROD Mars 1\n",
" Viking 1\n",
"B-TIME April 2\n",
"I-LOC ##onia 1\n",
" West 1\n",
"I-OTHER ##ion 1\n",
"I-PER ##n 1\n",
" Mali 1\n",
"I-PROD ##biter 1\n",
" 1 1\n",
" Or 1\n",
"I-TIME , 1\n",
" 1998 1\n",
" 5 1\n",
" 8 1\n",
"dtype: int64"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open(\"6 sagorsarkercodeswitch-spaeng-ner-lince.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "9c02ef71-62d9-40f1-98a8-138419cff895",
"metadata": {},
"source": [
"## 7 gunghio/xlm-roberta-base-finetuned-panx-ner"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "a8df4657-5842-43c0-97b8-15346f5a4578",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'entity_group': 'ORG', 'score': 0.9839335, 'word': 'NASA', 'start': 16, 'end': 20}, {'entity_group': 'ORG', 'score': 0.9257912, 'word': 'FaceOnMars,', 'start': 88, 'end': 101}, {'entity_group': 'LOC', 'score': 0.8524113, 'word': 'Mars,', 'start': 152, 'end': 157}, {'entity_group': 'ORG', 'score': 0.9470773, 'word': 'Viking1spacecraft', 'start': 240, 'end': 259}, {'entity_group': 'ORG', 'score': 0.6181985, 'word': 'Martian', 'start': 407, 'end': 414}, {'entity_group': 'LOC', 'score': 0.49310815, 'word': 'mesa,', 'start': 415, 'end': 420}, {'entity_group': 'LOC', 'score': 0.8700732, 'word': 'Cydonia,', 'start': 435, 'end': 443}, {'entity_group': 'ORG', 'score': 0.57100993, 'word': 'EgyptionPharaoh.', 'start': 496, 'end': 513}, {'entity_group': 'LOC', 'score': 0.46428245, 'word': 'formation', 'start': 622, 'end': 631}, {'entity_group': 'ORG', 'score': 0.90882486, 'word': \"NASA's\", 'start': 801, 'end': 807}, {'entity_group': 'LOC', 'score': 0.5685518, 'word': 'Mars--', 'start': 843, 'end': 849}, {'entity_group': 'LOC', 'score': 0.89254224, 'word': 'Mars', 'start': 875, 'end': 879}, {'entity_group': 'LOC', 'score': 0.8768112, 'word': 'Mars,', 'start': 1088, 'end': 1093}, {'entity_group': 'ORG', 'score': 0.88662714, 'word': 'NASAbudget', 'start': 1169, 'end': 1180}, {'entity_group': 'ORG', 'score': 0.5386654, 'word': 'ancientcivilization', 'start': 1196, 'end': 1216}, {'entity_group': 'LOC', 'score': 0.6887058, 'word': 'Mars.', 'start': 1220, 'end': 1225}, {'entity_group': 'PER', 'score': 0.86852753, 'word': 'MichaelMalin', 'start': 1312, 'end': 1325}, {'entity_group': 'ORG', 'score': 0.9244041, 'word': 'MarsOrbitercamerateam', 'start': 1334, 'end': 1358}, {'entity_group': 'ORG', 'score': 0.65044224, 'word': 'Vikingphotos,', 'start': 1419, 'end': 1433}, {'entity_group': 'ORG', 'score': 0.6091294, 'word': 'digitalimage,', 'start': 1877, 'end': 1891}, {'entity_group': 'LOC', 'score': 0.93108547, 'word': 'AmericanWest.', 'start': 2094, 'end': 2108}]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\NW\\anaconda3\\Lib\\site-packages\\transformers\\pipelines\\token_classification.py:392: UserWarning: Tokenizer does not support real words, using fallback heuristic\n",
" warnings.warn(\n"
]
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"\n",
"tokenizer = AutoTokenizer.from_pretrained(\"gunghio/xlm-roberta-base-finetuned-panx-ner\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"gunghio/xlm-roberta-base-finetuned-panx-ner\")\n",
"\n",
"nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer, aggregation_strategy=\"first\")\n",
"example = text\n",
"\n",
"ner_results = nlp(example)\n",
"print(ner_results)"
]
},
{
"cell_type": "code",
"execution_count": 69,
"id": "15eb816d-ac29-448d-8520-fd3cd2d3d957",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity_group\n",
"LOC 9\n",
"ORG 11\n",
"PER 1\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity_group word \n",
"LOC AmericanWest. 1\n",
" Cydonia, 1\n",
" Mars 1\n",
" Mars, 2\n",
" Mars-- 1\n",
" Mars. 1\n",
" formation 1\n",
" mesa, 1\n",
"ORG EgyptionPharaoh. 1\n",
" FaceOnMars, 1\n",
" MarsOrbitercamerateam 1\n",
" Martian 1\n",
" NASA 1\n",
" NASA's 1\n",
" NASAbudget 1\n",
" Viking1spacecraft 1\n",
" Vikingphotos, 1\n",
" ancientcivilization 1\n",
" digitalimage, 1\n",
"PER MichaelMalin 1\n",
"dtype: int64"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"with open(\"7 gunghioxlm-roberta-base-finetuned-panx-ner.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity_group']).size())\n",
"aux.groupby(['entity_group', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "7a419ca8-e000-4bf0-81fa-de8970e98a22",
"metadata": {},
"source": [
"## 8 51la5/roberta-large-NER"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "92d64393-7a73-4803-9fa5-043de160cd45",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\NW\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
" warnings.warn(\n",
"Some weights of the model checkpoint at 51la5/roberta-large-NER were not used when initializing XLMRobertaForTokenClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
"- This IS expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
"Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n"
]
},
{
"data": {
"text/plain": [
"[{'entity': 'I-ORG',\n",
" 'score': 0.9999913,\n",
" 'index': 8,\n",
" 'word': '▁NASA',\n",
" 'start': 16,\n",
" 'end': 20},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.9999641,\n",
" 'index': 23,\n",
" 'word': '▁Face',\n",
" 'start': 88,\n",
" 'end': 92},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.99989665,\n",
" 'index': 24,\n",
" 'word': '▁On',\n",
" 'start': 93,\n",
" 'end': 95},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.97350365,\n",
" 'index': 25,\n",
" 'word': '▁Mars',\n",
" 'start': 96,\n",
" 'end': 100},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.9999362,\n",
" 'index': 36,\n",
" 'word': '▁Mars',\n",
" 'start': 152,\n",
" 'end': 156},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.9992086,\n",
" 'index': 58,\n",
" 'word': '▁Viking',\n",
" 'start': 240,\n",
" 'end': 246},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.9989502,\n",
" 'index': 59,\n",
" 'word': '▁1',\n",
" 'start': 247,\n",
" 'end': 248},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.999977,\n",
" 'index': 97,\n",
" 'word': '▁Marti',\n",
" 'start': 407,\n",
" 'end': 412},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.99619055,\n",
" 'index': 98,\n",
" 'word': 'an',\n",
" 'start': 412,\n",
" 'end': 414},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.9999354,\n",
" 'index': 103,\n",
" 'word': '▁Cy',\n",
" 'start': 435,\n",
" 'end': 437},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.99994576,\n",
" 'index': 104,\n",
" 'word': 'do',\n",
" 'start': 437,\n",
" 'end': 439},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.99992585,\n",
" 'index': 105,\n",
" 'word': 'nia',\n",
" 'start': 439,\n",
" 'end': 442},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.9999789,\n",
" 'index': 119,\n",
" 'word': '▁Egypt',\n",
" 'start': 496,\n",
" 'end': 501},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.9614088,\n",
" 'index': 120,\n",
" 'word': 'ion',\n",
" 'start': 501,\n",
" 'end': 504},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.99997246,\n",
" 'index': 193,\n",
" 'word': '▁NASA',\n",
" 'start': 801,\n",
" 'end': 805},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.99979633,\n",
" 'index': 205,\n",
" 'word': '▁Mars',\n",
" 'start': 843,\n",
" 'end': 847},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.9998061,\n",
" 'index': 215,\n",
" 'word': '▁Mars',\n",
" 'start': 875,\n",
" 'end': 879},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.99984956,\n",
" 'index': 264,\n",
" 'word': '▁Mars',\n",
" 'start': 1088,\n",
" 'end': 1092},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.99996305,\n",
" 'index': 285,\n",
" 'word': '▁NASA',\n",
" 'start': 1169,\n",
" 'end': 1173},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.9998203,\n",
" 'index': 295,\n",
" 'word': '▁Mars',\n",
" 'start': 1220,\n",
" 'end': 1224},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.9999932,\n",
" 'index': 319,\n",
" 'word': '▁Michael',\n",
" 'start': 1312,\n",
" 'end': 1319},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.99999106,\n",
" 'index': 320,\n",
" 'word': '▁Malin',\n",
" 'start': 1320,\n",
" 'end': 1325},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.94105357,\n",
" 'index': 323,\n",
" 'word': '▁Mars',\n",
" 'start': 1334,\n",
" 'end': 1338},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.9839579,\n",
" 'index': 324,\n",
" 'word': '▁Or',\n",
" 'start': 1339,\n",
" 'end': 1341},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.9913346,\n",
" 'index': 325,\n",
" 'word': 'bit',\n",
" 'start': 1341,\n",
" 'end': 1344},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.9759228,\n",
" 'index': 326,\n",
" 'word': 'er',\n",
" 'start': 1344,\n",
" 'end': 1346},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.999749,\n",
" 'index': 341,\n",
" 'word': '▁Viking',\n",
" 'start': 1419,\n",
" 'end': 1425},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.9999914,\n",
" 'index': 416,\n",
" 'word': '▁Malin',\n",
" 'start': 1745,\n",
" 'end': 1750},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.92417294,\n",
" 'index': 491,\n",
" 'word': '▁American',\n",
" 'start': 2094,\n",
" 'end': 2102},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.99954396,\n",
" 'index': 492,\n",
" 'word': '▁West',\n",
" 'start': 2103,\n",
" 'end': 2107}]"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"51la5/roberta-large-NER\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"51la5/roberta-large-NER\")\n",
"classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"classifier(text)"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "9cf28bd0-f78c-48f1-98f0-e6b8dea07e21",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"I-LOC 9\n",
"I-MISC 15\n",
"I-ORG 3\n",
"I-PER 3\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word \n",
"I-LOC do 1\n",
" nia 1\n",
" ▁Cy 1\n",
" ▁Mars 5\n",
" ▁West 1\n",
"I-MISC an 1\n",
" bit 1\n",
" er 1\n",
" ion 1\n",
" ▁1 1\n",
" ▁American 1\n",
" ▁Egypt 1\n",
" ▁Face 1\n",
" ▁Mars 2\n",
" ▁Marti 1\n",
" ▁On 1\n",
" ▁Or 1\n",
" ▁Viking 2\n",
"I-ORG ▁NASA 3\n",
"I-PER ▁Malin 2\n",
" ▁Michael 1\n",
"dtype: int64"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open(\"8 51la5roberta-large-NER.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "c6a46082-0ffa-413c-ae69-4ffbf2081e71",
"metadata": {},
"source": [
"## 9 dmargutierrezdistilbert-base-multilingual-cased-mapa_coarse-ner"
]
},
{
"cell_type": "code",
"execution_count": 74,
"id": "e832ae0c-3ecd-4080-aeee-70b0fac8db0b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'entity': 'B-ADDRESS',\n",
" 'score': 0.89953065,\n",
" 'index': 104,\n",
" 'word': 'C',\n",
" 'start': 435,\n",
" 'end': 436},\n",
" {'entity': 'I-ADDRESS',\n",
" 'score': 0.776557,\n",
" 'index': 105,\n",
" 'word': '##yd',\n",
" 'start': 436,\n",
" 'end': 438},\n",
" {'entity': 'I-ADDRESS',\n",
" 'score': 0.87639356,\n",
" 'index': 106,\n",
" 'word': '##onia',\n",
" 'start': 438,\n",
" 'end': 442},\n",
" {'entity': 'B-AMOUNT',\n",
" 'score': 0.94337094,\n",
" 'index': 248,\n",
" 'word': '25',\n",
" 'start': 1013,\n",
" 'end': 1015},\n",
" {'entity': 'I-AMOUNT',\n",
" 'score': 0.83502764,\n",
" 'index': 249,\n",
" 'word': 'years',\n",
" 'start': 1016,\n",
" 'end': 1021},\n",
" {'entity': 'B-DATE',\n",
" 'score': 0.993107,\n",
" 'index': 311,\n",
" 'word': 'April',\n",
" 'start': 1297,\n",
" 'end': 1302},\n",
" {'entity': 'I-DATE',\n",
" 'score': 0.9911287,\n",
" 'index': 312,\n",
" 'word': '5',\n",
" 'start': 1303,\n",
" 'end': 1304},\n",
" {'entity': 'I-DATE',\n",
" 'score': 0.9842742,\n",
" 'index': 313,\n",
" 'word': ',',\n",
" 'start': 1304,\n",
" 'end': 1305},\n",
" {'entity': 'I-DATE',\n",
" 'score': 0.9907127,\n",
" 'index': 314,\n",
" 'word': '1998',\n",
" 'start': 1306,\n",
" 'end': 1310},\n",
" {'entity': 'B-PERSON',\n",
" 'score': 0.9272426,\n",
" 'index': 316,\n",
" 'word': 'Michael',\n",
" 'start': 1312,\n",
" 'end': 1319},\n",
" {'entity': 'I-PERSON',\n",
" 'score': 0.9829417,\n",
" 'index': 317,\n",
" 'word': 'Mali',\n",
" 'start': 1320,\n",
" 'end': 1324},\n",
" {'entity': 'I-PERSON',\n",
" 'score': 0.96556324,\n",
" 'index': 318,\n",
" 'word': '##n',\n",
" 'start': 1324,\n",
" 'end': 1325},\n",
" {'entity': 'B-ORGANISATION',\n",
" 'score': 0.92684674,\n",
" 'index': 321,\n",
" 'word': 'Mars',\n",
" 'start': 1334,\n",
" 'end': 1338},\n",
" {'entity': 'I-ORGANISATION',\n",
" 'score': 0.9111312,\n",
" 'index': 322,\n",
" 'word': 'Or',\n",
" 'start': 1339,\n",
" 'end': 1341},\n",
" {'entity': 'I-ORGANISATION',\n",
" 'score': 0.8044608,\n",
" 'index': 323,\n",
" 'word': '##biter',\n",
" 'start': 1341,\n",
" 'end': 1346},\n",
" {'entity': 'B-DATE',\n",
" 'score': 0.9925527,\n",
" 'index': 394,\n",
" 'word': 'April',\n",
" 'start': 1652,\n",
" 'end': 1657},\n",
" {'entity': 'I-DATE',\n",
" 'score': 0.989077,\n",
" 'index': 395,\n",
" 'word': '8',\n",
" 'start': 1658,\n",
" 'end': 1659},\n",
" {'entity': 'I-DATE',\n",
" 'score': 0.9804143,\n",
" 'index': 396,\n",
" 'word': ',',\n",
" 'start': 1659,\n",
" 'end': 1660},\n",
" {'entity': 'I-DATE',\n",
" 'score': 0.989312,\n",
" 'index': 397,\n",
" 'word': '2001',\n",
" 'start': 1661,\n",
" 'end': 1665},\n",
" {'entity': 'B-PERSON',\n",
" 'score': 0.6016297,\n",
" 'index': 415,\n",
" 'word': 'Mali',\n",
" 'start': 1745,\n",
" 'end': 1749},\n",
" {'entity': 'I-PERSON',\n",
" 'score': 0.7576901,\n",
" 'index': 416,\n",
" 'word': '##n',\n",
" 'start': 1749,\n",
" 'end': 1750},\n",
" {'entity': 'B-ADDRESS',\n",
" 'score': 0.48142406,\n",
" 'index': 492,\n",
" 'word': 'American',\n",
" 'start': 2094,\n",
" 'end': 2102},\n",
" {'entity': 'I-ADDRESS',\n",
" 'score': 0.456201,\n",
" 'index': 493,\n",
" 'word': 'West',\n",
" 'start': 2103,\n",
" 'end': 2107}]"
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"dmargutierrez/distilbert-base-multilingual-cased-mapa_coarse-ner\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"dmargutierrez/distilbert-base-multilingual-cased-mapa_coarse-ner\")\n",
"classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"classifier(text)"
]
},
{
"cell_type": "code",
"execution_count": 75,
"id": "3464c046-a851-4cb6-a7fe-5a71e03b2c70",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"B-ADDRESS 2\n",
"B-AMOUNT 1\n",
"B-DATE 2\n",
"B-ORGANISATION 1\n",
"B-PERSON 2\n",
"I-ADDRESS 3\n",
"I-AMOUNT 1\n",
"I-DATE 6\n",
"I-ORGANISATION 2\n",
"I-PERSON 3\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word \n",
"B-ADDRESS American 1\n",
" C 1\n",
"B-AMOUNT 25 1\n",
"B-DATE April 2\n",
"B-ORGANISATION Mars 1\n",
"B-PERSON Mali 1\n",
" Michael 1\n",
"I-ADDRESS ##onia 1\n",
" ##yd 1\n",
" West 1\n",
"I-AMOUNT years 1\n",
"I-DATE , 2\n",
" 1998 1\n",
" 2001 1\n",
" 5 1\n",
" 8 1\n",
"I-ORGANISATION ##biter 1\n",
" Or 1\n",
"I-PERSON ##n 2\n",
" Mali 1\n",
"dtype: int64"
]
},
"execution_count": 75,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open(\"9 dmargutierrezdistilbert-base-multilingual-cased-mapa_coarse-ner.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "d464a1a6-7472-4961-907c-a58292fb42b2",
"metadata": {},
"source": [
"## 10 mbrutonspa_enpt_mBERT"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "038960d7-d348-4025-b336-acd0b7a6827b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'entity': 'r0:arg1|tem',\n",
" 'score': 0.8560656,\n",
" 'index': 4,\n",
" 'word': 'you',\n",
" 'start': 7,\n",
" 'end': 10},\n",
" {'entity': 'r0:root',\n",
" 'score': 0.99593973,\n",
" 'index': 6,\n",
" 'word': 're',\n",
" 'start': 11,\n",
" 'end': 13},\n",
" {'entity': 'r1:arg1|tem',\n",
" 'score': 0.7426193,\n",
" 'index': 11,\n",
" 'word': 'you',\n",
" 'start': 32,\n",
" 'end': 35},\n",
" {'entity': 'r1:root',\n",
" 'score': 0.94838774,\n",
" 'index': 13,\n",
" 'word': 'be',\n",
" 'start': 43,\n",
" 'end': 45},\n",
" {'entity': 'r1:arg2|atr',\n",
" 'score': 0.64391094,\n",
" 'index': 14,\n",
" 'word': 'able',\n",
" 'start': 46,\n",
" 'end': 50},\n",
" {'entity': 'r2:root',\n",
" 'score': 0.80442923,\n",
" 'index': 16,\n",
" 'word': 'tell',\n",
" 'start': 54,\n",
" 'end': 58},\n",
" {'entity': 'r1:arg1|pat',\n",
" 'score': 0.33438408,\n",
" 'index': 20,\n",
" 'word': 'story',\n",
" 'start': 72,\n",
" 'end': 77},\n",
" {'entity': 'r4:arg1|tem',\n",
" 'score': 0.34810358,\n",
" 'index': 27,\n",
" 'word': 'which',\n",
" 'start': 102,\n",
" 'end': 107},\n",
" {'entity': 'r5:root',\n",
" 'score': 0.5292147,\n",
" 'index': 30,\n",
" 'word': 'is',\n",
" 'start': 118,\n",
" 'end': 120},\n",
" {'entity': 'r4:arg2|atr',\n",
" 'score': 0.46503437,\n",
" 'index': 31,\n",
" 'word': 'evidence',\n",
" 'start': 121,\n",
" 'end': 129},\n",
" {'entity': 'r6:arg2|atr',\n",
" 'score': 0.07937594,\n",
" 'index': 33,\n",
" 'word': 'there',\n",
" 'start': 135,\n",
" 'end': 140},\n",
" {'entity': 'r6:root',\n",
" 'score': 0.55574423,\n",
" 'index': 34,\n",
" 'word': 'is',\n",
" 'start': 141,\n",
" 'end': 143},\n",
" {'entity': 'r5:arg1|tem',\n",
" 'score': 0.38110724,\n",
" 'index': 35,\n",
" 'word': 'life',\n",
" 'start': 144,\n",
" 'end': 148},\n",
" {'entity': 'r7:arg1|pat',\n",
" 'score': 0.16291595,\n",
" 'index': 42,\n",
" 'word': 'face',\n",
" 'start': 171,\n",
" 'end': 175},\n",
" {'entity': 'r7:root',\n",
" 'score': 0.6100769,\n",
" 'index': 44,\n",
" 'word': 'created',\n",
" 'start': 180,\n",
" 'end': 187},\n",
" {'entity': 'r7:arg0|agt',\n",
" 'score': 0.15812016,\n",
" 'index': 45,\n",
" 'word': 'by',\n",
" 'start': 188,\n",
" 'end': 190},\n",
" {'entity': 'r6:arg0|agt',\n",
" 'score': 0.18646379,\n",
" 'index': 62,\n",
" 'word': 'spacecraft',\n",
" 'start': 249,\n",
" 'end': 259},\n",
" {'entity': 'r7:root',\n",
" 'score': 0.6611128,\n",
" 'index': 64,\n",
" 'word': 'ci',\n",
" 'start': 264,\n",
" 'end': 266},\n",
" {'entity': 'r7:root',\n",
" 'score': 0.5775215,\n",
" 'index': 65,\n",
" 'word': '##rc',\n",
" 'start': 266,\n",
" 'end': 268},\n",
" {'entity': 'r7:root',\n",
" 'score': 0.35779056,\n",
" 'index': 66,\n",
" 'word': '##ling',\n",
" 'start': 268,\n",
" 'end': 272},\n",
" {'entity': 'r7:arg1|pat',\n",
" 'score': 0.39385104,\n",
" 'index': 68,\n",
" 'word': 'planet',\n",
" 'start': 277,\n",
" 'end': 283},\n",
" {'entity': 'r7:root',\n",
" 'score': 0.42999077,\n",
" 'index': 70,\n",
" 'word': 'sna',\n",
" 'start': 285,\n",
" 'end': 288},\n",
" {'entity': 'r8:root',\n",
" 'score': 0.26388708,\n",
" 'index': 71,\n",
" 'word': '##pping',\n",
" 'start': 288,\n",
" 'end': 293},\n",
" {'entity': 'r7:arg1|pat',\n",
" 'score': 0.2658881,\n",
" 'index': 72,\n",
" 'word': 'photos',\n",
" 'start': 294,\n",
" 'end': 300},\n",
" {'entity': 'r8:arg0|agt',\n",
" 'score': 0.19982801,\n",
" 'index': 75,\n",
" 'word': 'it',\n",
" 'start': 307,\n",
" 'end': 309},\n",
" {'entity': 'r8:root',\n",
" 'score': 0.3022762,\n",
" 'index': 76,\n",
" 'word': 'spotted',\n",
" 'start': 310,\n",
" 'end': 317},\n",
" {'entity': 'r8:arg1|pat',\n",
" 'score': 0.08930407,\n",
" 'index': 81,\n",
" 'word': 'like',\n",
" 'start': 330,\n",
" 'end': 334},\n",
" {'entity': 'r8:arg1|pat',\n",
" 'score': 0.09168834,\n",
" 'index': 82,\n",
" 'word': '##ness',\n",
" 'start': 334,\n",
" 'end': 338},\n",
" {'entity': 'r8:arg0|agt',\n",
" 'score': 0.054486226,\n",
" 'index': 88,\n",
" 'word': 'Us',\n",
" 'start': 356,\n",
" 'end': 358},\n",
" {'entity': 'r8:arg0|agt',\n",
" 'score': 0.10709534,\n",
" 'index': 89,\n",
" 'word': 'scientists',\n",
" 'start': 359,\n",
" 'end': 369},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.18592234,\n",
" 'index': 90,\n",
" 'word': 'figure',\n",
" 'start': 370,\n",
" 'end': 376},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.15209025,\n",
" 'index': 91,\n",
" 'word': '##d',\n",
" 'start': 376,\n",
" 'end': 377},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.045862645,\n",
" 'index': 94,\n",
" 'word': 'it',\n",
" 'start': 387,\n",
" 'end': 389},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.115167715,\n",
" 'index': 95,\n",
" 'word': 'was',\n",
" 'start': 390,\n",
" 'end': 393},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.04374532,\n",
" 'index': 102,\n",
" 'word': 'common',\n",
" 'start': 421,\n",
" 'end': 427},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.040294692,\n",
" 'index': 110,\n",
" 'word': 'one',\n",
" 'start': 454,\n",
" 'end': 457},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.1052284,\n",
" 'index': 111,\n",
" 'word': 'had',\n",
" 'start': 458,\n",
" 'end': 461},\n",
" {'entity': 'r8:arg2|atr',\n",
" 'score': 0.04526653,\n",
" 'index': 112,\n",
" 'word': 'sh',\n",
" 'start': 462,\n",
" 'end': 464},\n",
" {'entity': 'r8:arg2|atr',\n",
" 'score': 0.050974954,\n",
" 'index': 113,\n",
" 'word': '##adow',\n",
" 'start': 464,\n",
" 'end': 468},\n",
" {'entity': 'r8:arg0|agt',\n",
" 'score': 0.04716369,\n",
" 'index': 115,\n",
" 'word': 'that',\n",
" 'start': 470,\n",
" 'end': 474},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.12286918,\n",
" 'index': 116,\n",
" 'word': 'made',\n",
" 'start': 475,\n",
" 'end': 479},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.034002524,\n",
" 'index': 117,\n",
" 'word': 'it',\n",
" 'start': 480,\n",
" 'end': 482},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.085215025,\n",
" 'index': 118,\n",
" 'word': 'look',\n",
" 'start': 483,\n",
" 'end': 487},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.04240514,\n",
" 'index': 119,\n",
" 'word': 'like',\n",
" 'start': 488,\n",
" 'end': 492},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.04686627,\n",
" 'index': 129,\n",
" 'word': 'days',\n",
" 'start': 523,\n",
" 'end': 527},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.040811997,\n",
" 'index': 130,\n",
" 'word': 'later',\n",
" 'start': 528,\n",
" 'end': 533},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.040206842,\n",
" 'index': 132,\n",
" 'word': 'we',\n",
" 'start': 535,\n",
" 'end': 537},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.080649935,\n",
" 'index': 133,\n",
" 'word': 'revealed',\n",
" 'start': 538,\n",
" 'end': 546},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.042861167,\n",
" 'index': 135,\n",
" 'word': 'image',\n",
" 'start': 551,\n",
" 'end': 556},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.042270634,\n",
" 'index': 136,\n",
" 'word': 'for',\n",
" 'start': 557,\n",
" 'end': 560},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.08127655,\n",
" 'index': 139,\n",
" 'word': 'see',\n",
" 'start': 568,\n",
" 'end': 571},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.044283886,\n",
" 'index': 142,\n",
" 'word': 'we',\n",
" 'start': 577,\n",
" 'end': 579},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.08544878,\n",
" 'index': 143,\n",
" 'word': 'made',\n",
" 'start': 580,\n",
" 'end': 584},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.040639073,\n",
" 'index': 144,\n",
" 'word': 'sure',\n",
" 'start': 585,\n",
" 'end': 589},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.0474719,\n",
" 'index': 148,\n",
" 'word': 'it',\n",
" 'start': 603,\n",
" 'end': 605},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.078273796,\n",
" 'index': 149,\n",
" 'word': 'was',\n",
" 'start': 606,\n",
" 'end': 609},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.060446404,\n",
" 'index': 153,\n",
" 'word': 'formation',\n",
" 'start': 622,\n",
" 'end': 631},\n",
" {'entity': 'r9:arg1|pat',\n",
" 'score': 0.030689918,\n",
" 'index': 154,\n",
" 'word': 'that',\n",
" 'start': 632,\n",
" 'end': 636},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.08887379,\n",
" 'index': 156,\n",
" 'word': 'res',\n",
" 'start': 642,\n",
" 'end': 645},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.07661998,\n",
" 'index': 157,\n",
" 'word': '##emble',\n",
" 'start': 645,\n",
" 'end': 650},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.0793471,\n",
" 'index': 158,\n",
" 'word': '##d',\n",
" 'start': 650,\n",
" 'end': 651},\n",
" {'entity': 'r8:arg2|atr',\n",
" 'score': 0.034773763,\n",
" 'index': 161,\n",
" 'word': 'head',\n",
" 'start': 660,\n",
" 'end': 664},\n",
" {'entity': 'r9:arg1|pat',\n",
" 'score': 0.034561444,\n",
" 'index': 166,\n",
" 'word': 'all',\n",
" 'start': 679,\n",
" 'end': 682},\n",
" {'entity': 'r9:arg1|pat',\n",
" 'score': 0.038585175,\n",
" 'index': 168,\n",
" 'word': 'it',\n",
" 'start': 686,\n",
" 'end': 688},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.087780945,\n",
" 'index': 169,\n",
" 'word': 'was',\n",
" 'start': 689,\n",
" 'end': 692},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.0999068,\n",
" 'index': 170,\n",
" 'word': 'formed',\n",
" 'start': 693,\n",
" 'end': 699},\n",
" {'entity': 'r8:arg2|atr',\n",
" 'score': 0.027370188,\n",
" 'index': 171,\n",
" 'word': 'by',\n",
" 'start': 700,\n",
" 'end': 702},\n",
" {'entity': 'r8:arg0|agt',\n",
" 'score': 0.078722976,\n",
" 'index': 176,\n",
" 'word': 'We',\n",
" 'start': 712,\n",
" 'end': 714},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.12920254,\n",
" 'index': 178,\n",
" 'word': 'announced',\n",
" 'start': 720,\n",
" 'end': 729},\n",
" {'entity': 'r8:arg1|pat',\n",
" 'score': 0.07986989,\n",
" 'index': 179,\n",
" 'word': 'it',\n",
" 'start': 730,\n",
" 'end': 732},\n",
" {'entity': 'r8:arg0|agt',\n",
" 'score': 0.048153095,\n",
" 'index': 181,\n",
" 'word': 'we',\n",
" 'start': 741,\n",
" 'end': 743},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.10950655,\n",
" 'index': 182,\n",
" 'word': 'thought',\n",
" 'start': 744,\n",
" 'end': 751},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.041486118,\n",
" 'index': 183,\n",
" 'word': 'it',\n",
" 'start': 752,\n",
" 'end': 754},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.05073052,\n",
" 'index': 184,\n",
" 'word': 'would',\n",
" 'start': 755,\n",
" 'end': 760},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.10624526,\n",
" 'index': 185,\n",
" 'word': 'be',\n",
" 'start': 761,\n",
" 'end': 763},\n",
" {'entity': 'r8:arg2|atr',\n",
" 'score': 0.054641366,\n",
" 'index': 188,\n",
" 'word': 'way',\n",
" 'start': 771,\n",
" 'end': 774},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.10240572,\n",
" 'index': 190,\n",
" 'word': 'engage',\n",
" 'start': 778,\n",
" 'end': 784},\n",
" {'entity': 'r9:arg1|pat',\n",
" 'score': 0.044021852,\n",
" 'index': 192,\n",
" 'word': 'public',\n",
" 'start': 789,\n",
" 'end': 795},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.090464294,\n",
" 'index': 200,\n",
" 'word': 'at',\n",
" 'start': 822,\n",
" 'end': 824},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.09293412,\n",
" 'index': 201,\n",
" 'word': '##rra',\n",
" 'start': 824,\n",
" 'end': 827},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.09475694,\n",
" 'index': 202,\n",
" 'word': '##ct',\n",
" 'start': 827,\n",
" 'end': 829},\n",
" {'entity': 'r9:arg1|pat',\n",
" 'score': 0.056167223,\n",
" 'index': 203,\n",
" 'word': 'attention',\n",
" 'start': 830,\n",
" 'end': 839},\n",
" {'entity': 'r8:arg2|atr',\n",
" 'score': 0.03035166,\n",
" 'index': 204,\n",
" 'word': 'to',\n",
" 'start': 840,\n",
" 'end': 842},\n",
" {'entity': 'r9:arg1|pat',\n",
" 'score': 0.032432236,\n",
" 'index': 209,\n",
" 'word': 'it',\n",
" 'start': 854,\n",
" 'end': 856},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.083309464,\n",
" 'index': 210,\n",
" 'word': 'did',\n",
" 'start': 857,\n",
" 'end': 860},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.038414363,\n",
" 'index': 213,\n",
" 'word': 'face',\n",
" 'start': 867,\n",
" 'end': 871},\n",
" {'entity': 'r8:arg2|atr',\n",
" 'score': 0.02903995,\n",
" 'index': 216,\n",
" 'word': 'soon',\n",
" 'start': 880,\n",
" 'end': 884},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.07692599,\n",
" 'index': 217,\n",
" 'word': 'became',\n",
" 'start': 885,\n",
" 'end': 891},\n",
" {'entity': 'r8:arg2|atr',\n",
" 'score': 0.051249504,\n",
" 'index': 219,\n",
" 'word': 'pop',\n",
" 'start': 894,\n",
" 'end': 897},\n",
" {'entity': 'r8:arg2|atr',\n",
" 'score': 0.04843377,\n",
" 'index': 220,\n",
" 'word': 'i',\n",
" 'start': 898,\n",
" 'end': 899},\n",
" {'entity': 'r8:arg2|atr',\n",
" 'score': 0.04545699,\n",
" 'index': 221,\n",
" 'word': '##con',\n",
" 'start': 899,\n",
" 'end': 902},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.08494211,\n",
" 'index': 223,\n",
" 'word': 'shot',\n",
" 'start': 904,\n",
" 'end': 908},\n",
" {'entity': 'r8:arg2|atr',\n",
" 'score': 0.03157478,\n",
" 'index': 224,\n",
" 'word': 'in',\n",
" 'start': 909,\n",
" 'end': 911},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.1036089,\n",
" 'index': 227,\n",
" 'word': 'appeared',\n",
" 'start': 920,\n",
" 'end': 928},\n",
" {'entity': 'r8:arg2|atr',\n",
" 'score': 0.03158019,\n",
" 'index': 228,\n",
" 'word': 'in',\n",
" 'start': 929,\n",
" 'end': 931},\n",
" {'entity': 'r8:argM|tmp',\n",
" 'score': 0.091016516,\n",
" 'index': 247,\n",
" 'word': 'for',\n",
" 'start': 1009,\n",
" 'end': 1012},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.06676676,\n",
" 'index': 252,\n",
" 'word': 'people',\n",
" 'start': 1028,\n",
" 'end': 1034},\n",
" {'entity': 'r8:root',\n",
" 'score': 0.20488475,\n",
" 'index': 253,\n",
" 'word': 'thought',\n",
" 'start': 1035,\n",
" 'end': 1042},\n",
" {'entity': 'r7:arg1|tem',\n",
" 'score': 0.36603826,\n",
" 'index': 256,\n",
" 'word': 'land',\n",
" 'start': 1055,\n",
" 'end': 1059},\n",
" {'entity': 'r7:arg1|tem',\n",
" 'score': 0.31067976,\n",
" 'index': 257,\n",
" 'word': '##form',\n",
" 'start': 1059,\n",
" 'end': 1063},\n",
" {'entity': 'r7:root',\n",
" 'score': 0.48778686,\n",
" 'index': 258,\n",
" 'word': 'was',\n",
" 'start': 1064,\n",
" 'end': 1067},\n",
" {'entity': 'r7:arg2|atr',\n",
" 'score': 0.13593948,\n",
" 'index': 259,\n",
" 'word': 'evidence',\n",
" 'start': 1068,\n",
" 'end': 1076},\n",
" {'entity': 'r6:arg0|agt',\n",
" 'score': 0.21349978,\n",
" 'index': 268,\n",
" 'word': 'scientists',\n",
" 'start': 1106,\n",
" 'end': 1116},\n",
" {'entity': 'r7:root',\n",
" 'score': 0.48215854,\n",
" 'index': 269,\n",
" 'word': 'wanted',\n",
" 'start': 1117,\n",
" 'end': 1123},\n",
" {'entity': 'r7:root',\n",
" 'score': 0.66229856,\n",
" 'index': 271,\n",
" 'word': 'hide',\n",
" 'start': 1127,\n",
" 'end': 1131},\n",
" {'entity': 'r7:arg1|pat',\n",
" 'score': 0.34266004,\n",
" 'index': 272,\n",
" 'word': 'it',\n",
" 'start': 1132,\n",
" 'end': 1134},\n",
" {'entity': 'r6:arg0|agt',\n",
" 'score': 0.27256963,\n",
" 'index': 278,\n",
" 'word': 'defender',\n",
" 'start': 1152,\n",
" 'end': 1160},\n",
" {'entity': 'r5:root',\n",
" 'score': 0.4016244,\n",
" 'index': 286,\n",
" 'word': 'was',\n",
" 'start': 1192,\n",
" 'end': 1195},\n",
" {'entity': 'r6:arg0|agt',\n",
" 'score': 0.3085603,\n",
" 'index': 293,\n",
" 'word': 'We',\n",
" 'start': 1226,\n",
" 'end': 1228},\n",
" {'entity': 'r7:root',\n",
" 'score': 0.68402517,\n",
" 'index': 294,\n",
" 'word': 'decided',\n",
" 'start': 1229,\n",
" 'end': 1236},\n",
" {'entity': 'r7:root',\n",
" 'score': 0.6312899,\n",
" 'index': 296,\n",
" 'word': 'take',\n",
" 'start': 1240,\n",
" 'end': 1244},\n",
" {'entity': 'r7:arg1|pat',\n",
" 'score': 0.2447523,\n",
" 'index': 298,\n",
" 'word': 'shot',\n",
" 'start': 1253,\n",
" 'end': 1257},\n",
" {'entity': 'r5:argM|fin',\n",
" 'score': 0.21691667,\n",
" 'index': 300,\n",
" 'word': 'to',\n",
" 'start': 1263,\n",
" 'end': 1265},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.0507591,\n",
" 'index': 303,\n",
" 'word': 'we',\n",
" 'start': 1276,\n",
" 'end': 1278},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.110183075,\n",
" 'index': 304,\n",
" 'word': 'were',\n",
" 'start': 1279,\n",
" 'end': 1283},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.0827311,\n",
" 'index': 305,\n",
" 'word': '##n',\n",
" 'start': 1283,\n",
" 'end': 1284},\n",
" {'entity': 'r8:arg2|atr',\n",
" 'score': 0.07966802,\n",
" 'index': 308,\n",
" 'word': 'wrong',\n",
" 'start': 1287,\n",
" 'end': 1292},\n",
" {'entity': 'r8:argM|tmp',\n",
" 'score': 0.047043335,\n",
" 'index': 310,\n",
" 'word': 'on',\n",
" 'start': 1294,\n",
" 'end': 1296},\n",
" {'entity': 'r8:arg0|agt',\n",
" 'score': 0.037761096,\n",
" 'index': 316,\n",
" 'word': 'Michael',\n",
" 'start': 1312,\n",
" 'end': 1319},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.11788476,\n",
" 'index': 326,\n",
" 'word': 'took',\n",
" 'start': 1359,\n",
" 'end': 1363},\n",
" {'entity': 'r8:arg1|pat',\n",
" 'score': 0.061609197,\n",
" 'index': 328,\n",
" 'word': 'picture',\n",
" 'start': 1366,\n",
" 'end': 1373},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.04639362,\n",
" 'index': 329,\n",
" 'word': 'that',\n",
" 'start': 1374,\n",
" 'end': 1378},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.10497463,\n",
" 'index': 330,\n",
" 'word': 'was',\n",
" 'start': 1379,\n",
" 'end': 1382},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.037833076,\n",
" 'index': 332,\n",
" 'word': 'times',\n",
" 'start': 1387,\n",
" 'end': 1392},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.04543908,\n",
" 'index': 333,\n",
" 'word': 'sharp',\n",
" 'start': 1393,\n",
" 'end': 1398},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.043668192,\n",
" 'index': 334,\n",
" 'word': '##er',\n",
" 'start': 1398,\n",
" 'end': 1400},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.06038779,\n",
" 'index': 339,\n",
" 'word': 'photos',\n",
" 'start': 1426,\n",
" 'end': 1432},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.08361747,\n",
" 'index': 341,\n",
" 'word': 'reveal',\n",
" 'start': 1434,\n",
" 'end': 1440},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.0819967,\n",
" 'index': 342,\n",
" 'word': '##ing',\n",
" 'start': 1440,\n",
" 'end': 1443},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.050429754,\n",
" 'index': 345,\n",
" 'word': 'land',\n",
" 'start': 1454,\n",
" 'end': 1458},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.055836514,\n",
" 'index': 346,\n",
" 'word': '##form',\n",
" 'start': 1458,\n",
" 'end': 1462},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.030725654,\n",
" 'index': 348,\n",
" 'word': 'which',\n",
" 'start': 1464,\n",
" 'end': 1469},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.07811248,\n",
" 'index': 349,\n",
" 'word': 'meant',\n",
" 'start': 1470,\n",
" 'end': 1475},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.056091927,\n",
" 'index': 352,\n",
" 'word': 'monument',\n",
" 'start': 1485,\n",
" 'end': 1493},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.037297357,\n",
" 'index': 357,\n",
" 'word': 'picture',\n",
" 'start': 1505,\n",
" 'end': 1512},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.08379,\n",
" 'index': 358,\n",
" 'word': 'wasn',\n",
" 'start': 1513,\n",
" 'end': 1517},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.046028677,\n",
" 'index': 362,\n",
" 'word': 'clear',\n",
" 'start': 1525,\n",
" 'end': 1530},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.030750155,\n",
" 'index': 363,\n",
" 'word': 'at',\n",
" 'start': 1531,\n",
" 'end': 1533},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.029666642,\n",
" 'index': 366,\n",
" 'word': 'which',\n",
" 'start': 1539,\n",
" 'end': 1544},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.07766737,\n",
" 'index': 368,\n",
" 'word': 'mean',\n",
" 'start': 1551,\n",
" 'end': 1555},\n",
" {'entity': 'r9:arg1|pat',\n",
" 'score': 0.037586644,\n",
" 'index': 370,\n",
" 'word': 'marking',\n",
" 'start': 1562,\n",
" 'end': 1569},\n",
" {'entity': 'r9:arg1|pat',\n",
" 'score': 0.039157256,\n",
" 'index': 371,\n",
" 'word': '##s',\n",
" 'start': 1569,\n",
" 'end': 1570},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.06878179,\n",
" 'index': 372,\n",
" 'word': 'were',\n",
" 'start': 1571,\n",
" 'end': 1575},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.06736001,\n",
" 'index': 373,\n",
" 'word': 'hidden',\n",
" 'start': 1576,\n",
" 'end': 1582},\n",
" {'entity': 'r8:arg2|atr',\n",
" 'score': 0.030598668,\n",
" 'index': 374,\n",
" 'word': 'by',\n",
" 'start': 1583,\n",
" 'end': 1585},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.04365823,\n",
" 'index': 381,\n",
" 'word': 'ye',\n",
" 'start': 1601,\n",
" 'end': 1603},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.04754055,\n",
" 'index': 382,\n",
" 'word': '##s',\n",
" 'start': 1603,\n",
" 'end': 1604},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.04762847,\n",
" 'index': 384,\n",
" 'word': 'rum',\n",
" 'start': 1610,\n",
" 'end': 1613},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.038992863,\n",
" 'index': 385,\n",
" 'word': '##or',\n",
" 'start': 1613,\n",
" 'end': 1615},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.06939269,\n",
" 'index': 386,\n",
" 'word': 'started',\n",
" 'start': 1616,\n",
" 'end': 1623},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.08153154,\n",
" 'index': 390,\n",
" 'word': 'prove',\n",
" 'start': 1632,\n",
" 'end': 1637},\n",
" {'entity': 'r9:arg1|pat',\n",
" 'score': 0.042257246,\n",
" 'index': 391,\n",
" 'word': 'them',\n",
" 'start': 1638,\n",
" 'end': 1642},\n",
" {'entity': 'r8:arg2|atr',\n",
" 'score': 0.04214874,\n",
" 'index': 392,\n",
" 'word': 'wrong',\n",
" 'start': 1643,\n",
" 'end': 1648},\n",
" {'entity': 'r8:argM|tmp',\n",
" 'score': 0.036950577,\n",
" 'index': 393,\n",
" 'word': 'on',\n",
" 'start': 1649,\n",
" 'end': 1651},\n",
" {'entity': 'r8:arg0|agt',\n",
" 'score': 0.033291806,\n",
" 'index': 398,\n",
" 'word': 'we',\n",
" 'start': 1666,\n",
" 'end': 1668},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.0738949,\n",
" 'index': 399,\n",
" 'word': 'decided',\n",
" 'start': 1669,\n",
" 'end': 1676},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.08140796,\n",
" 'index': 401,\n",
" 'word': 'take',\n",
" 'start': 1680,\n",
" 'end': 1684},\n",
" {'entity': 'r9:arg1|pat',\n",
" 'score': 0.04987211,\n",
" 'index': 403,\n",
" 'word': 'picture',\n",
" 'start': 1693,\n",
" 'end': 1700},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.06430703,\n",
" 'index': 405,\n",
" 'word': 'making',\n",
" 'start': 1702,\n",
" 'end': 1708},\n",
" {'entity': 'r8:arg2|atr',\n",
" 'score': 0.038376,\n",
" 'index': 406,\n",
" 'word': 'sure',\n",
" 'start': 1709,\n",
" 'end': 1713},\n",
" {'entity': 'r9:arg1|pat',\n",
" 'score': 0.03541435,\n",
" 'index': 407,\n",
" 'word': 'it',\n",
" 'start': 1714,\n",
" 'end': 1716},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.074262,\n",
" 'index': 408,\n",
" 'word': 'was',\n",
" 'start': 1717,\n",
" 'end': 1720},\n",
" {'entity': 'r8:arg2|atr',\n",
" 'score': 0.044221986,\n",
" 'index': 410,\n",
" 'word': 'cloud',\n",
" 'start': 1723,\n",
" 'end': 1728},\n",
" {'entity': 'r8:arg2|atr',\n",
" 'score': 0.039963495,\n",
" 'index': 413,\n",
" 'word': 'day',\n",
" 'start': 1740,\n",
" 'end': 1743},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.03479836,\n",
" 'index': 415,\n",
" 'word': 'Mali',\n",
" 'start': 1745,\n",
" 'end': 1749},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.032437824,\n",
" 'index': 419,\n",
" 'word': 'team',\n",
" 'start': 1753,\n",
" 'end': 1757},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.08305774,\n",
" 'index': 420,\n",
" 'word': 'captured',\n",
" 'start': 1758,\n",
" 'end': 1766},\n",
" {'entity': 'r9:arg1|pat',\n",
" 'score': 0.04045077,\n",
" 'index': 424,\n",
" 'word': 'photo',\n",
" 'start': 1778,\n",
" 'end': 1783},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.066850476,\n",
" 'index': 425,\n",
" 'word': 'using',\n",
" 'start': 1784,\n",
" 'end': 1789},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.03912363,\n",
" 'index': 427,\n",
" 'word': 'camera',\n",
" 'start': 1794,\n",
" 'end': 1800},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.0366046,\n",
" 'index': 434,\n",
" 'word': 'With',\n",
" 'start': 1832,\n",
" 'end': 1836},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.034650415,\n",
" 'index': 437,\n",
" 'word': 'you',\n",
" 'start': 1849,\n",
" 'end': 1852},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.07881917,\n",
" 'index': 439,\n",
" 'word': 'disc',\n",
" 'start': 1857,\n",
" 'end': 1861},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.08386986,\n",
" 'index': 440,\n",
" 'word': '##ern',\n",
" 'start': 1861,\n",
" 'end': 1864},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.035086773,\n",
" 'index': 441,\n",
" 'word': 'things',\n",
" 'start': 1865,\n",
" 'end': 1871},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.033257537,\n",
" 'index': 442,\n",
" 'word': 'in',\n",
" 'start': 1872,\n",
" 'end': 1874},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.035879184,\n",
" 'index': 455,\n",
" 'word': 'which',\n",
" 'start': 1927,\n",
" 'end': 1932},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.0820557,\n",
" 'index': 456,\n",
" 'word': 'means',\n",
" 'start': 1933,\n",
" 'end': 1938},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.045568023,\n",
" 'index': 458,\n",
" 'word': 'there',\n",
" 'start': 1942,\n",
" 'end': 1947},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.07430672,\n",
" 'index': 459,\n",
" 'word': 'were',\n",
" 'start': 1948,\n",
" 'end': 1952},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.048614994,\n",
" 'index': 461,\n",
" 'word': 'signs',\n",
" 'start': 1957,\n",
" 'end': 1962},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.040848035,\n",
" 'index': 465,\n",
" 'word': 'you',\n",
" 'start': 1972,\n",
" 'end': 1975},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.06783971,\n",
" 'index': 466,\n",
" 'word': 'could',\n",
" 'start': 1976,\n",
" 'end': 1981},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.043203566,\n",
" 'index': 467,\n",
" 'word': 'easily',\n",
" 'start': 1982,\n",
" 'end': 1988},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.07753727,\n",
" 'index': 468,\n",
" 'word': 'see',\n",
" 'start': 1989,\n",
" 'end': 1992},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.03982749,\n",
" 'index': 469,\n",
" 'word': 'what',\n",
" 'start': 1993,\n",
" 'end': 1997},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.050202448,\n",
" 'index': 470,\n",
" 'word': 'they',\n",
" 'start': 1998,\n",
" 'end': 2002},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.07350303,\n",
" 'index': 471,\n",
" 'word': 'were',\n",
" 'start': 2003,\n",
" 'end': 2007},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.041813992,\n",
" 'index': 473,\n",
" 'word': 'What',\n",
" 'start': 2009,\n",
" 'end': 2013},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.043242317,\n",
" 'index': 475,\n",
" 'word': 'picture',\n",
" 'start': 2018,\n",
" 'end': 2025},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.071942516,\n",
" 'index': 476,\n",
" 'word': 'showed',\n",
" 'start': 2026,\n",
" 'end': 2032},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.065713726,\n",
" 'index': 477,\n",
" 'word': 'was',\n",
" 'start': 2033,\n",
" 'end': 2036},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.047005344,\n",
" 'index': 479,\n",
" 'word': 'but',\n",
" 'start': 2041,\n",
" 'end': 2044},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.053594884,\n",
" 'index': 480,\n",
" 'word': '##te',\n",
" 'start': 2044,\n",
" 'end': 2046},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.038346287,\n",
" 'index': 484,\n",
" 'word': 'which',\n",
" 'start': 2056,\n",
" 'end': 2061},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.071865395,\n",
" 'index': 485,\n",
" 'word': 'are',\n",
" 'start': 2062,\n",
" 'end': 2065},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.052885562,\n",
" 'index': 486,\n",
" 'word': 'land',\n",
" 'start': 2066,\n",
" 'end': 2070},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.058789182,\n",
" 'index': 487,\n",
" 'word': '##form',\n",
" 'start': 2070,\n",
" 'end': 2074},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.05849257,\n",
" 'index': 488,\n",
" 'word': '##s',\n",
" 'start': 2074,\n",
" 'end': 2075},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.05117639,\n",
" 'index': 489,\n",
" 'word': 'common',\n",
" 'start': 2076,\n",
" 'end': 2082},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.051266044,\n",
" 'index': 490,\n",
" 'word': 'around',\n",
" 'start': 2083,\n",
" 'end': 2089}]"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"mbruton/spa_enpt_mBERT\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"mbruton/spa_enpt_mBERT\")\n",
"classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"classifier(text)"
]
},
{
"cell_type": "code",
"execution_count": 78,
"id": "a5c41e30-7978-45e4-9eba-c3eaf8fcd77d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"r0:arg1|tem 1\n",
"r0:root 1\n",
"r10:root 5\n",
"r1:arg1|pat 1\n",
"r1:arg1|tem 1\n",
"r1:arg2|atr 1\n",
"r1:root 1\n",
"r2:root 1\n",
"r4:arg1|tem 1\n",
"r4:arg2|atr 1\n",
"r5:arg1|tem 1\n",
"r5:argM|fin 1\n",
"r5:root 2\n",
"r6:arg0|agt 4\n",
"r6:arg2|atr 1\n",
"r6:root 1\n",
"r7:arg0|agt 1\n",
"r7:arg1|pat 5\n",
"r7:arg1|tem 2\n",
"r7:arg2|atr 1\n",
"r7:root 10\n",
"r8:arg0|agt 8\n",
"r8:arg1|pat 4\n",
"r8:arg1|tem 8\n",
"r8:arg2|atr 18\n",
"r8:argM|tmp 3\n",
"r8:root 3\n",
"r9:arg1|pat 12\n",
"r9:root 102\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word \n",
"r0:arg1|tem you 1\n",
"r0:root re 1\n",
"r10:root hidden 1\n",
" in 1\n",
" picture 1\n",
" ..\n",
"r9:root what 1\n",
" which 2\n",
" would 1\n",
" ye 1\n",
" you 2\n",
"Length: 174, dtype: int64"
]
},
"execution_count": 78,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"with open(\"10 mbrutonspa_enpt_mBERT.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "6da949b6-cae9-409d-a05d-d93f9f634bfd",
"metadata": {},
"source": [
"## 11 benjamin/wtp-bert-mini"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "6b9d327a-5f62-435e-923e-9a51d91c958a",
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "The checkpoint you are trying to load has model type `bert-char` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:951\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:653\u001b[0m, in \u001b[0;36m_LazyConfigMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping:\n\u001b[1;32m--> 653\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[0;32m 654\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping[key]\n",
"\u001b[1;31mKeyError\u001b[0m: 'bert-char'",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[80], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 2\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-bert-mini\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 3\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-bert-mini\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m classifier \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:953\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m--> 953\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 954\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe checkpoint you are trying to load has model type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 955\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut Transformers does not recognize this architecture. This could be because of an \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 956\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124missue with the checkpoint, or because your version of Transformers is out of date.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 957\u001b[0m )\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_dict(config_dict, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39munused_kwargs)\n\u001b[0;32m 959\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 960\u001b[0m \u001b[38;5;66;03m# Fallback: use pattern matching on the string.\u001b[39;00m\n\u001b[0;32m 961\u001b[0m \u001b[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001b[39;00m\n",
"\u001b[1;31mValueError\u001b[0m: The checkpoint you are trying to load has model type `bert-char` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date."
]
}
],
"source": [
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"benjamin/wtp-bert-mini\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"benjamin/wtp-bert-mini\")\n",
"classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"classifier(text)"
]
},
{
"cell_type": "markdown",
"id": "6b2ab65e-9856-45a9-aa54-c230abfe8d4b",
"metadata": {},
"source": [
"## 12 Babelscapewikineural-multilingual-ner"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "d9c12157-5280-4753-b0e6-b1cd7de813bd",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'entity': 'B-ORG',\n",
" 'score': 0.9951147,\n",
" 'index': 8,\n",
" 'word': 'NASA',\n",
" 'start': 16,\n",
" 'end': 20},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.6191017,\n",
" 'index': 23,\n",
" 'word': 'Face',\n",
" 'start': 88,\n",
" 'end': 92},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.5708344,\n",
" 'index': 24,\n",
" 'word': 'On',\n",
" 'start': 93,\n",
" 'end': 95},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.5786274,\n",
" 'index': 25,\n",
" 'word': 'Mars',\n",
" 'start': 96,\n",
" 'end': 100},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.877606,\n",
" 'index': 37,\n",
" 'word': 'Mars',\n",
" 'start': 152,\n",
" 'end': 156},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.9051992,\n",
" 'index': 60,\n",
" 'word': 'Viking',\n",
" 'start': 240,\n",
" 'end': 246},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.9834109,\n",
" 'index': 61,\n",
" 'word': '1',\n",
" 'start': 247,\n",
" 'end': 248},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.48295248,\n",
" 'index': 98,\n",
" 'word': 'Mart',\n",
" 'start': 407,\n",
" 'end': 411},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.47647634,\n",
" 'index': 99,\n",
" 'word': '##ian',\n",
" 'start': 411,\n",
" 'end': 414},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.97810775,\n",
" 'index': 104,\n",
" 'word': 'C',\n",
" 'start': 435,\n",
" 'end': 436},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.9512793,\n",
" 'index': 105,\n",
" 'word': '##yd',\n",
" 'start': 436,\n",
" 'end': 438},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.9480485,\n",
" 'index': 106,\n",
" 'word': '##onia',\n",
" 'start': 438,\n",
" 'end': 442},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.4522933,\n",
" 'index': 121,\n",
" 'word': 'Egypt',\n",
" 'start': 496,\n",
" 'end': 501},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.43941417,\n",
" 'index': 122,\n",
" 'word': '##ion',\n",
" 'start': 501,\n",
" 'end': 504},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.33125964,\n",
" 'index': 123,\n",
" 'word': 'Ph',\n",
" 'start': 505,\n",
" 'end': 507},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.2833639,\n",
" 'index': 124,\n",
" 'word': '##ara',\n",
" 'start': 507,\n",
" 'end': 510},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.9865861,\n",
" 'index': 194,\n",
" 'word': 'NASA',\n",
" 'start': 801,\n",
" 'end': 805},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.814626,\n",
" 'index': 205,\n",
" 'word': 'Mars',\n",
" 'start': 843,\n",
" 'end': 847},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.7951029,\n",
" 'index': 215,\n",
" 'word': 'Mars',\n",
" 'start': 875,\n",
" 'end': 879},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.87542975,\n",
" 'index': 263,\n",
" 'word': 'Mars',\n",
" 'start': 1088,\n",
" 'end': 1092},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.99368435,\n",
" 'index': 282,\n",
" 'word': 'NASA',\n",
" 'start': 1169,\n",
" 'end': 1173},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.7881979,\n",
" 'index': 291,\n",
" 'word': 'Mars',\n",
" 'start': 1220,\n",
" 'end': 1224},\n",
" {'entity': 'B-PER',\n",
" 'score': 0.9987973,\n",
" 'index': 316,\n",
" 'word': 'Michael',\n",
" 'start': 1312,\n",
" 'end': 1319},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.99919313,\n",
" 'index': 317,\n",
" 'word': 'Mali',\n",
" 'start': 1320,\n",
" 'end': 1324},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.99887544,\n",
" 'index': 318,\n",
" 'word': '##n',\n",
" 'start': 1324,\n",
" 'end': 1325},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.8786556,\n",
" 'index': 321,\n",
" 'word': 'Mars',\n",
" 'start': 1334,\n",
" 'end': 1338},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.9564052,\n",
" 'index': 322,\n",
" 'word': 'Or',\n",
" 'start': 1339,\n",
" 'end': 1341},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.96700704,\n",
" 'index': 323,\n",
" 'word': '##biter',\n",
" 'start': 1341,\n",
" 'end': 1346},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.89913684,\n",
" 'index': 338,\n",
" 'word': 'Viking',\n",
" 'start': 1419,\n",
" 'end': 1425},\n",
" {'entity': 'B-PER',\n",
" 'score': 0.97243416,\n",
" 'index': 415,\n",
" 'word': 'Mali',\n",
" 'start': 1745,\n",
" 'end': 1749},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.963992,\n",
" 'index': 416,\n",
" 'word': '##n',\n",
" 'start': 1749,\n",
" 'end': 1750},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.7103455,\n",
" 'index': 492,\n",
" 'word': 'American',\n",
" 'start': 2094,\n",
" 'end': 2102},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.5068105,\n",
" 'index': 493,\n",
" 'word': 'West',\n",
" 'start': 2103,\n",
" 'end': 2107}]"
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"Babelscape/wikineural-multilingual-ner\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"Babelscape/wikineural-multilingual-ner\")\n",
"classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"classifier(text)"
]
},
{
"cell_type": "code",
"execution_count": 84,
"id": "60a70391-65f5-4c73-9cd3-553d635386d9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"B-LOC 7\n",
"B-ORG 3\n",
"B-PER 2\n",
"I-LOC 2\n",
"I-MISC 15\n",
"I-PER 4\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word \n",
"B-LOC C 1\n",
" Egypt 1\n",
" Mars 5\n",
"B-ORG NASA 3\n",
"B-PER Mali 1\n",
" Michael 1\n",
"I-LOC ##onia 1\n",
" ##yd 1\n",
"I-MISC ##biter 1\n",
" ##ian 1\n",
" ##ion 1\n",
" 1 1\n",
" American 1\n",
" Face 1\n",
" Mars 2\n",
" Mart 1\n",
" On 1\n",
" Or 1\n",
" Ph 1\n",
" Viking 2\n",
" West 1\n",
"I-PER ##ara 1\n",
" ##n 2\n",
" Mali 1\n",
"dtype: int64"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"with open(\"12 Babelscapewikineural-multilingual-ner.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "b152a918-a4b1-4f0d-aa5a-48158eb9bee8",
"metadata": {},
"source": [
"## 13 julian-schelb/roberta-ner-multilingual"
]
},
{
"cell_type": "code",
"execution_count": 88,
"id": "982634a2-a7f4-4c9d-92d4-f3b34ba89931",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'entity': 'B-ORG',\n",
" 'score': 0.8837392,\n",
" 'index': 8,\n",
" 'word': '▁NASA',\n",
" 'start': 16,\n",
" 'end': 20},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.38925776,\n",
" 'index': 97,\n",
" 'word': '▁Marti',\n",
" 'start': 407,\n",
" 'end': 412},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.7155649,\n",
" 'index': 103,\n",
" 'word': '▁Cy',\n",
" 'start': 435,\n",
" 'end': 437},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.64458394,\n",
" 'index': 104,\n",
" 'word': 'do',\n",
" 'start': 437,\n",
" 'end': 439},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.623109,\n",
" 'index': 105,\n",
" 'word': 'nia',\n",
" 'start': 439,\n",
" 'end': 442},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.34994408,\n",
" 'index': 119,\n",
" 'word': '▁Egypt',\n",
" 'start': 496,\n",
" 'end': 501},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.32543704,\n",
" 'index': 120,\n",
" 'word': 'ion',\n",
" 'start': 501,\n",
" 'end': 504},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.4674562,\n",
" 'index': 121,\n",
" 'word': '▁Phar',\n",
" 'start': 505,\n",
" 'end': 509},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.53341544,\n",
" 'index': 122,\n",
" 'word': 'a',\n",
" 'start': 509,\n",
" 'end': 510},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.50114465,\n",
" 'index': 123,\n",
" 'word': 'oh',\n",
" 'start': 510,\n",
" 'end': 512},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.5386203,\n",
" 'index': 193,\n",
" 'word': '▁NASA',\n",
" 'start': 801,\n",
" 'end': 805},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.6421071,\n",
" 'index': 285,\n",
" 'word': '▁NASA',\n",
" 'start': 1169,\n",
" 'end': 1173},\n",
" {'entity': 'B-PER',\n",
" 'score': 0.521761,\n",
" 'index': 319,\n",
" 'word': '▁Michael',\n",
" 'start': 1312,\n",
" 'end': 1319},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.66133285,\n",
" 'index': 320,\n",
" 'word': '▁Malin',\n",
" 'start': 1320,\n",
" 'end': 1325},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.87530375,\n",
" 'index': 323,\n",
" 'word': '▁Mars',\n",
" 'start': 1334,\n",
" 'end': 1338},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.775388,\n",
" 'index': 324,\n",
" 'word': '▁Or',\n",
" 'start': 1339,\n",
" 'end': 1341},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.74558014,\n",
" 'index': 325,\n",
" 'word': 'bit',\n",
" 'start': 1341,\n",
" 'end': 1344},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.77897793,\n",
" 'index': 326,\n",
" 'word': 'er',\n",
" 'start': 1344,\n",
" 'end': 1346},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.781982,\n",
" 'index': 491,\n",
" 'word': '▁American',\n",
" 'start': 2094,\n",
" 'end': 2102},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.5271412,\n",
" 'index': 492,\n",
" 'word': '▁West',\n",
" 'start': 2103,\n",
" 'end': 2107}]"
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"julian-schelb/roberta-ner-multilingual\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"julian-schelb/roberta-ner-multilingual\")\n",
"classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"classifier(text)"
]
},
{
"cell_type": "code",
"execution_count": 89,
"id": "1836b86a-e0c8-42c4-b199-b45b3e4c6235",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"B-LOC 4\n",
"B-ORG 7\n",
"B-PER 1\n",
"I-LOC 1\n",
"I-ORG 3\n",
"I-PER 4\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word \n",
"B-LOC do 1\n",
" nia 1\n",
" ▁American 1\n",
" ▁Cy 1\n",
"B-ORG ion 1\n",
" ▁Egypt 1\n",
" ▁Mars 1\n",
" ▁Marti 1\n",
" ▁NASA 3\n",
"B-PER ▁Michael 1\n",
"I-LOC ▁West 1\n",
"I-ORG bit 1\n",
" er 1\n",
" ▁Or 1\n",
"I-PER a 1\n",
" oh 1\n",
" ▁Malin 1\n",
" ▁Phar 1\n",
"dtype: int64"
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open(\"13 julian-schelbroberta-ner-multilingual.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "16569163-0440-4747-8af0-167e67d576a5",
"metadata": {},
"source": [
"## 14 FacebookAI/xlm-roberta-large-finetuned-conll03-german"
]
},
{
"cell_type": "code",
"execution_count": 91,
"id": "3792dc25-ec55-4d6d-822c-e921c9129cd0",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Some weights of the model checkpoint at FacebookAI/xlm-roberta-large-finetuned-conll03-english were not used when initializing XLMRobertaForTokenClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
"- This IS expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'entity': 'I-ORG', 'score': 0.9999913, 'index': 8, 'word': '▁NASA', 'start': 16, 'end': 20}, {'entity': 'I-MISC', 'score': 0.9999641, 'index': 23, 'word': '▁Face', 'start': 88, 'end': 92}, {'entity': 'I-MISC', 'score': 0.99989665, 'index': 24, 'word': '▁On', 'start': 93, 'end': 95}, {'entity': 'I-MISC', 'score': 0.97350365, 'index': 25, 'word': '▁Mars', 'start': 96, 'end': 100}, {'entity': 'I-LOC', 'score': 0.9999362, 'index': 36, 'word': '▁Mars', 'start': 152, 'end': 156}, {'entity': 'I-MISC', 'score': 0.9992086, 'index': 58, 'word': '▁Viking', 'start': 240, 'end': 246}, {'entity': 'I-MISC', 'score': 0.9989502, 'index': 59, 'word': '▁1', 'start': 247, 'end': 248}, {'entity': 'I-MISC', 'score': 0.999977, 'index': 97, 'word': '▁Marti', 'start': 407, 'end': 412}, {'entity': 'I-MISC', 'score': 0.99619055, 'index': 98, 'word': 'an', 'start': 412, 'end': 414}, {'entity': 'I-LOC', 'score': 0.9999354, 'index': 103, 'word': '▁Cy', 'start': 435, 'end': 437}, {'entity': 'I-LOC', 'score': 0.99994576, 'index': 104, 'word': 'do', 'start': 437, 'end': 439}, {'entity': 'I-LOC', 'score': 0.99992585, 'index': 105, 'word': 'nia', 'start': 439, 'end': 442}, {'entity': 'I-MISC', 'score': 0.9999789, 'index': 119, 'word': '▁Egypt', 'start': 496, 'end': 501}, {'entity': 'I-MISC', 'score': 0.9614088, 'index': 120, 'word': 'ion', 'start': 501, 'end': 504}, {'entity': 'I-ORG', 'score': 0.99997246, 'index': 193, 'word': '▁NASA', 'start': 801, 'end': 805}, {'entity': 'I-LOC', 'score': 0.99979633, 'index': 205, 'word': '▁Mars', 'start': 843, 'end': 847}, {'entity': 'I-LOC', 'score': 0.9998061, 'index': 215, 'word': '▁Mars', 'start': 875, 'end': 879}, {'entity': 'I-LOC', 'score': 0.99984956, 'index': 264, 'word': '▁Mars', 'start': 1088, 'end': 1092}, {'entity': 'I-ORG', 'score': 0.99996305, 'index': 285, 'word': '▁NASA', 'start': 1169, 'end': 1173}, {'entity': 'I-LOC', 'score': 0.9998203, 'index': 295, 'word': '▁Mars', 'start': 1220, 'end': 1224}, {'entity': 'I-PER', 'score': 0.9999932, 'index': 319, 'word': '▁Michael', 'start': 1312, 'end': 1319}, {'entity': 'I-PER', 'score': 0.99999106, 'index': 320, 'word': '▁Malin', 'start': 1320, 'end': 1325}, {'entity': 'I-MISC', 'score': 0.94105357, 'index': 323, 'word': '▁Mars', 'start': 1334, 'end': 1338}, {'entity': 'I-MISC', 'score': 0.9839579, 'index': 324, 'word': '▁Or', 'start': 1339, 'end': 1341}, {'entity': 'I-MISC', 'score': 0.9913346, 'index': 325, 'word': 'bit', 'start': 1341, 'end': 1344}, {'entity': 'I-MISC', 'score': 0.9759228, 'index': 326, 'word': 'er', 'start': 1344, 'end': 1346}, {'entity': 'I-MISC', 'score': 0.999749, 'index': 341, 'word': '▁Viking', 'start': 1419, 'end': 1425}, {'entity': 'I-PER', 'score': 0.9999914, 'index': 416, 'word': '▁Malin', 'start': 1745, 'end': 1750}, {'entity': 'I-MISC', 'score': 0.92417294, 'index': 491, 'word': '▁American', 'start': 2094, 'end': 2102}, {'entity': 'I-LOC', 'score': 0.99954396, 'index': 492, 'word': '▁West', 'start': 2103, 'end': 2107}]\n"
]
}
],
"source": [
"import json\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"FacebookAI/xlm-roberta-large-finetuned-conll03-german\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"FacebookAI/xlm-roberta-large-finetuned-conll03-english\")\n",
"classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"salida=classifier(text)\n",
"print(salida)\n"
]
},
{
"cell_type": "code",
"execution_count": 92,
"id": "356a5837-b0f4-4c63-95f0-42a562b8553e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"I-LOC 9\n",
"I-MISC 15\n",
"I-ORG 3\n",
"I-PER 3\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word \n",
"I-LOC do 1\n",
" nia 1\n",
" ▁Cy 1\n",
" ▁Mars 5\n",
" ▁West 1\n",
"I-MISC an 1\n",
" bit 1\n",
" er 1\n",
" ion 1\n",
" ▁1 1\n",
" ▁American 1\n",
" ▁Egypt 1\n",
" ▁Face 1\n",
" ▁Mars 2\n",
" ▁Marti 1\n",
" ▁On 1\n",
" ▁Or 1\n",
" ▁Viking 2\n",
"I-ORG ▁NASA 3\n",
"I-PER ▁Malin 2\n",
" ▁Michael 1\n",
"dtype: int64"
]
},
"execution_count": 92,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open(\"14 FacebookAIxlm-roberta-large-finetuned-conll03-german.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "f34e36ec-8d26-49b4-89a5-15eaa203e168",
"metadata": {},
"source": [
"## 15 jplu/tf-xlm-r-ner-40-lang"
]
},
{
"cell_type": "code",
"execution_count": 103,
"id": "293a3320-8385-42a0-9aaf-fd24487ade80",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: transformers[sentencepiece] in c:\\users\\nw\\anaconda3\\lib\\site-packages (4.41.2)\n",
"Requirement already satisfied: filelock in c:\\users\\nw\\anaconda3\\lib\\site-packages (from transformers[sentencepiece]) (3.13.1)\n",
"Requirement already satisfied: huggingface-hub<1.0,>=0.23.0 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from transformers[sentencepiece]) (0.23.4)\n",
"Requirement already satisfied: numpy>=1.17 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from transformers[sentencepiece]) (1.26.4)\n",
"Requirement already satisfied: packaging>=20.0 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from transformers[sentencepiece]) (23.1)\n",
"Requirement already satisfied: pyyaml>=5.1 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from transformers[sentencepiece]) (6.0.1)\n",
"Requirement already satisfied: regex!=2019.12.17 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from transformers[sentencepiece]) (2023.10.3)\n",
"Requirement already satisfied: requests in c:\\users\\nw\\anaconda3\\lib\\site-packages (from transformers[sentencepiece]) (2.32.3)\n",
"Requirement already satisfied: tokenizers<0.20,>=0.19 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from transformers[sentencepiece]) (0.19.1)\n",
"Requirement already satisfied: safetensors>=0.4.1 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from transformers[sentencepiece]) (0.4.3)\n",
"Requirement already satisfied: tqdm>=4.27 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from transformers[sentencepiece]) (4.66.4)\n",
"Requirement already satisfied: sentencepiece!=0.1.92,>=0.1.91 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from transformers[sentencepiece]) (0.2.0)\n",
"Requirement already satisfied: protobuf in c:\\users\\nw\\anaconda3\\lib\\site-packages (from transformers[sentencepiece]) (3.20.3)\n",
"Requirement already satisfied: fsspec>=2023.5.0 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from huggingface-hub<1.0,>=0.23.0->transformers[sentencepiece]) (2023.10.0)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from huggingface-hub<1.0,>=0.23.0->transformers[sentencepiece]) (4.9.0)\n",
"Requirement already satisfied: colorama in c:\\users\\nw\\anaconda3\\lib\\site-packages (from tqdm>=4.27->transformers[sentencepiece]) (0.4.6)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from requests->transformers[sentencepiece]) (2.0.4)\n",
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from requests->transformers[sentencepiece]) (3.4)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from requests->transformers[sentencepiece]) (2.0.7)\n",
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from requests->transformers[sentencepiece]) (2024.2.2)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install transformers[sentencepiece]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "a6a0c627-2ac4-48f0-8870-fd1a2479245c",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Some layers from the model checkpoint at jplu/tf-xlm-r-ner-40-lang were not used when initializing TFXLMRobertaForTokenClassification: ['dropout_38']\n",
"- This IS expected if you are initializing TFXLMRobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing TFXLMRobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
"All the layers of TFXLMRobertaForTokenClassification were initialized from the model checkpoint at jplu/tf-xlm-r-ner-40-lang.\n",
"If your task is similar to the task the model of the checkpoint was trained on, you can already use TFXLMRobertaForTokenClassification for predictions without further training.\n",
"Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n"
]
},
{
"data": {
"text/plain": [
"[{'entity': 'ORG',\n",
" 'score': 0.94845986,\n",
" 'index': 8,\n",
" 'word': '▁NASA',\n",
" 'start': 15,\n",
" 'end': 20},\n",
" {'entity': 'ORG',\n",
" 'score': 0.7998288,\n",
" 'index': 58,\n",
" 'word': '▁Viking',\n",
" 'start': 239,\n",
" 'end': 246},\n",
" {'entity': 'ORG',\n",
" 'score': 0.7579509,\n",
" 'index': 59,\n",
" 'word': '▁1',\n",
" 'start': 246,\n",
" 'end': 248},\n",
" {'entity': 'ORG',\n",
" 'score': 0.52262145,\n",
" 'index': 60,\n",
" 'word': '▁space',\n",
" 'start': 248,\n",
" 'end': 254},\n",
" {'entity': 'ORG',\n",
" 'score': 0.5880066,\n",
" 'index': 193,\n",
" 'word': '▁NASA',\n",
" 'start': 800,\n",
" 'end': 805},\n",
" {'entity': 'ORG',\n",
" 'score': 0.7987309,\n",
" 'index': 285,\n",
" 'word': '▁NASA',\n",
" 'start': 1168,\n",
" 'end': 1173},\n",
" {'entity': 'PER',\n",
" 'score': 0.9565463,\n",
" 'index': 319,\n",
" 'word': '▁Michael',\n",
" 'start': 1311,\n",
" 'end': 1319},\n",
" {'entity': 'PER',\n",
" 'score': 0.9528012,\n",
" 'index': 320,\n",
" 'word': '▁Malin',\n",
" 'start': 1319,\n",
" 'end': 1325},\n",
" {'entity': 'ORG',\n",
" 'score': 0.5696624,\n",
" 'index': 323,\n",
" 'word': '▁Mars',\n",
" 'start': 1333,\n",
" 'end': 1338},\n",
" {'entity': 'LOC',\n",
" 'score': 0.93097985,\n",
" 'index': 491,\n",
" 'word': '▁American',\n",
" 'start': 2093,\n",
" 'end': 2102},\n",
" {'entity': 'LOC',\n",
" 'score': 0.89757425,\n",
" 'index': 492,\n",
" 'word': '▁West',\n",
" 'start': 2102,\n",
" 'end': 2107}]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import pipeline\n",
"\n",
"nlp_ner = pipeline(\n",
" \"ner\",\n",
" model=\"jplu/tf-xlm-r-ner-40-lang\",\n",
" tokenizer=(\n",
" 'jplu/tf-xlm-r-ner-40-lang'),\n",
" framework=\"tf\"\n",
")\n",
"\n",
"nlp_ner(text)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "8ea0424d-e2da-4958-be66-2dc6e6556456",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"LOC 2\n",
"ORG 7\n",
"PER 2\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word \n",
"LOC ▁American 1\n",
" ▁West 1\n",
"ORG ▁1 1\n",
" ▁Mars 1\n",
" ▁NASA 3\n",
" ▁Viking 1\n",
" ▁space 1\n",
"PER ▁Malin 1\n",
" ▁Michael 1\n",
"dtype: int64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open(\"15 jplutf-xlm-r-ner-40-lang.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "6fc9092a-d4c6-4365-9d73-205ea982bde6",
"metadata": {},
"source": [
"## 16 sagorsarker/codeswitch-spaeng-lid-lince"
]
},
{
"cell_type": "code",
"execution_count": 127,
"id": "c71bfa05-c1e7-4bae-bcc6-78962e062f1f",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Some weights of the model checkpoint at sagorsarker/codeswitch-spaeng-lid-lince were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']\n",
"- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
]
},
{
"data": {
"text/plain": [
"[{'entity': 'en',\n",
" 'score': 0.9998629,\n",
" 'index': 1,\n",
" 'word': 'So',\n",
" 'start': 0,\n",
" 'end': 2},\n",
" {'entity': 'other',\n",
" 'score': 0.9999267,\n",
" 'index': 2,\n",
" 'word': ',',\n",
" 'start': 2,\n",
" 'end': 3},\n",
" {'entity': 'en',\n",
" 'score': 0.99985707,\n",
" 'index': 3,\n",
" 'word': 'if',\n",
" 'start': 4,\n",
" 'end': 6},\n",
" {'entity': 'en',\n",
" 'score': 0.99984396,\n",
" 'index': 4,\n",
" 'word': 'you',\n",
" 'start': 7,\n",
" 'end': 10},\n",
" {'entity': 'en',\n",
" 'score': 0.9998392,\n",
" 'index': 5,\n",
" 'word': \"'\",\n",
" 'start': 10,\n",
" 'end': 11},\n",
" {'entity': 'en',\n",
" 'score': 0.9998178,\n",
" 'index': 6,\n",
" 'word': 're',\n",
" 'start': 11,\n",
" 'end': 13},\n",
" {'entity': 'en',\n",
" 'score': 0.99961334,\n",
" 'index': 7,\n",
" 'word': 'a',\n",
" 'start': 14,\n",
" 'end': 15},\n",
" {'entity': 'ne',\n",
" 'score': 0.99565876,\n",
" 'index': 8,\n",
" 'word': 'NASA',\n",
" 'start': 16,\n",
" 'end': 20},\n",
" {'entity': 'en',\n",
" 'score': 0.9997341,\n",
" 'index': 9,\n",
" 'word': 'scientist',\n",
" 'start': 21,\n",
" 'end': 30},\n",
" {'entity': 'other',\n",
" 'score': 0.99992585,\n",
" 'index': 10,\n",
" 'word': ',',\n",
" 'start': 30,\n",
" 'end': 31},\n",
" {'entity': 'en',\n",
" 'score': 0.99986625,\n",
" 'index': 11,\n",
" 'word': 'you',\n",
" 'start': 32,\n",
" 'end': 35},\n",
" {'entity': 'en',\n",
" 'score': 0.99986553,\n",
" 'index': 12,\n",
" 'word': 'should',\n",
" 'start': 36,\n",
" 'end': 42},\n",
" {'entity': 'en',\n",
" 'score': 0.9998518,\n",
" 'index': 13,\n",
" 'word': 'be',\n",
" 'start': 43,\n",
" 'end': 45},\n",
" {'entity': 'en',\n",
" 'score': 0.9998604,\n",
" 'index': 14,\n",
" 'word': 'able',\n",
" 'start': 46,\n",
" 'end': 50},\n",
" {'entity': 'en',\n",
" 'score': 0.99985576,\n",
" 'index': 15,\n",
" 'word': 'to',\n",
" 'start': 51,\n",
" 'end': 53},\n",
" {'entity': 'en',\n",
" 'score': 0.999874,\n",
" 'index': 16,\n",
" 'word': 'tell',\n",
" 'start': 54,\n",
" 'end': 58},\n",
" {'entity': 'en',\n",
" 'score': 0.9998467,\n",
" 'index': 17,\n",
" 'word': 'me',\n",
" 'start': 59,\n",
" 'end': 61},\n",
" {'entity': 'en',\n",
" 'score': 0.99984443,\n",
" 'index': 18,\n",
" 'word': 'the',\n",
" 'start': 62,\n",
" 'end': 65},\n",
" {'entity': 'en',\n",
" 'score': 0.99985886,\n",
" 'index': 19,\n",
" 'word': 'whole',\n",
" 'start': 66,\n",
" 'end': 71},\n",
" {'entity': 'en',\n",
" 'score': 0.9998666,\n",
" 'index': 20,\n",
" 'word': 'story',\n",
" 'start': 72,\n",
" 'end': 77},\n",
" {'entity': 'en',\n",
" 'score': 0.99987817,\n",
" 'index': 21,\n",
" 'word': 'about',\n",
" 'start': 78,\n",
" 'end': 83},\n",
" {'entity': 'en',\n",
" 'score': 0.99987066,\n",
" 'index': 22,\n",
" 'word': 'the',\n",
" 'start': 84,\n",
" 'end': 87},\n",
" {'entity': 'en',\n",
" 'score': 0.9998729,\n",
" 'index': 23,\n",
" 'word': 'Face',\n",
" 'start': 88,\n",
" 'end': 92},\n",
" {'entity': 'en',\n",
" 'score': 0.9998679,\n",
" 'index': 24,\n",
" 'word': 'On',\n",
" 'start': 93,\n",
" 'end': 95},\n",
" {'entity': 'ne',\n",
" 'score': 0.9943815,\n",
" 'index': 25,\n",
" 'word': 'Mars',\n",
" 'start': 96,\n",
" 'end': 100},\n",
" {'entity': 'other',\n",
" 'score': 0.9999286,\n",
" 'index': 26,\n",
" 'word': ',',\n",
" 'start': 100,\n",
" 'end': 101},\n",
" {'entity': 'en',\n",
" 'score': 0.9998467,\n",
" 'index': 27,\n",
" 'word': 'which',\n",
" 'start': 102,\n",
" 'end': 107},\n",
" {'entity': 'en',\n",
" 'score': 0.9998258,\n",
" 'index': 28,\n",
" 'word': 'obvious',\n",
" 'start': 108,\n",
" 'end': 115},\n",
" {'entity': 'en',\n",
" 'score': 0.99984264,\n",
" 'index': 29,\n",
" 'word': '##ly',\n",
" 'start': 115,\n",
" 'end': 117},\n",
" {'entity': 'en',\n",
" 'score': 0.9998216,\n",
" 'index': 30,\n",
" 'word': 'is',\n",
" 'start': 118,\n",
" 'end': 120},\n",
" {'entity': 'en',\n",
" 'score': 0.9998173,\n",
" 'index': 31,\n",
" 'word': 'evidence',\n",
" 'start': 121,\n",
" 'end': 129},\n",
" {'entity': 'en',\n",
" 'score': 0.9998375,\n",
" 'index': 32,\n",
" 'word': 'that',\n",
" 'start': 130,\n",
" 'end': 134},\n",
" {'entity': 'en',\n",
" 'score': 0.9998186,\n",
" 'index': 33,\n",
" 'word': 'there',\n",
" 'start': 135,\n",
" 'end': 140},\n",
" {'entity': 'en',\n",
" 'score': 0.99982053,\n",
" 'index': 34,\n",
" 'word': 'is',\n",
" 'start': 141,\n",
" 'end': 143},\n",
" {'entity': 'en',\n",
" 'score': 0.99982506,\n",
" 'index': 35,\n",
" 'word': 'life',\n",
" 'start': 144,\n",
" 'end': 148},\n",
" {'entity': 'en',\n",
" 'score': 0.99981946,\n",
" 'index': 36,\n",
" 'word': 'on',\n",
" 'start': 149,\n",
" 'end': 151},\n",
" {'entity': 'ne',\n",
" 'score': 0.99421823,\n",
" 'index': 37,\n",
" 'word': 'Mars',\n",
" 'start': 152,\n",
" 'end': 156},\n",
" {'entity': 'other',\n",
" 'score': 0.9999292,\n",
" 'index': 38,\n",
" 'word': ',',\n",
" 'start': 156,\n",
" 'end': 157},\n",
" {'entity': 'en',\n",
" 'score': 0.99983776,\n",
" 'index': 39,\n",
" 'word': 'and',\n",
" 'start': 158,\n",
" 'end': 161},\n",
" {'entity': 'en',\n",
" 'score': 0.9998344,\n",
" 'index': 40,\n",
" 'word': 'that',\n",
" 'start': 162,\n",
" 'end': 166},\n",
" {'entity': 'en',\n",
" 'score': 0.9997547,\n",
" 'index': 41,\n",
" 'word': 'the',\n",
" 'start': 167,\n",
" 'end': 170},\n",
" {'entity': 'en',\n",
" 'score': 0.99978274,\n",
" 'index': 42,\n",
" 'word': 'face',\n",
" 'start': 171,\n",
" 'end': 175},\n",
" {'entity': 'en',\n",
" 'score': 0.9997836,\n",
" 'index': 43,\n",
" 'word': 'was',\n",
" 'start': 176,\n",
" 'end': 179},\n",
" {'entity': 'en',\n",
" 'score': 0.99971956,\n",
" 'index': 44,\n",
" 'word': 'created',\n",
" 'start': 180,\n",
" 'end': 187},\n",
" {'entity': 'en',\n",
" 'score': 0.99975187,\n",
" 'index': 45,\n",
" 'word': 'by',\n",
" 'start': 188,\n",
" 'end': 190},\n",
" {'entity': 'en',\n",
" 'score': 0.9997137,\n",
" 'index': 46,\n",
" 'word': 'alien',\n",
" 'start': 191,\n",
" 'end': 196},\n",
" {'entity': 'en',\n",
" 'score': 0.99977976,\n",
" 'index': 47,\n",
" 'word': '##s',\n",
" 'start': 196,\n",
" 'end': 197},\n",
" {'entity': 'other',\n",
" 'score': 0.9999249,\n",
" 'index': 48,\n",
" 'word': ',',\n",
" 'start': 197,\n",
" 'end': 198},\n",
" {'entity': 'en',\n",
" 'score': 0.99981195,\n",
" 'index': 49,\n",
" 'word': 'correct',\n",
" 'start': 199,\n",
" 'end': 206},\n",
" {'entity': 'other',\n",
" 'score': 0.9999312,\n",
" 'index': 50,\n",
" 'word': '?',\n",
" 'start': 206,\n",
" 'end': 207},\n",
" {'entity': 'other',\n",
" 'score': 0.9999089,\n",
" 'index': 51,\n",
" 'word': '\"',\n",
" 'start': 207,\n",
" 'end': 208},\n",
" {'entity': 'en',\n",
" 'score': 0.9864689,\n",
" 'index': 52,\n",
" 'word': 'No',\n",
" 'start': 209,\n",
" 'end': 211},\n",
" {'entity': 'other',\n",
" 'score': 0.99993014,\n",
" 'index': 53,\n",
" 'word': ',',\n",
" 'start': 211,\n",
" 'end': 212},\n",
" {'entity': 'en',\n",
" 'score': 0.9997601,\n",
" 'index': 54,\n",
" 'word': 'twenty',\n",
" 'start': 213,\n",
" 'end': 219},\n",
" {'entity': 'en',\n",
" 'score': 0.9996724,\n",
" 'index': 55,\n",
" 'word': 'five',\n",
" 'start': 220,\n",
" 'end': 224},\n",
" {'entity': 'en',\n",
" 'score': 0.99973506,\n",
" 'index': 56,\n",
" 'word': 'years',\n",
" 'start': 225,\n",
" 'end': 230},\n",
" {'entity': 'en',\n",
" 'score': 0.9997675,\n",
" 'index': 57,\n",
" 'word': 'ago',\n",
" 'start': 231,\n",
" 'end': 234},\n",
" {'entity': 'other',\n",
" 'score': 0.9999273,\n",
" 'index': 58,\n",
" 'word': ',',\n",
" 'start': 234,\n",
" 'end': 235},\n",
" {'entity': 'en',\n",
" 'score': 0.9992092,\n",
" 'index': 59,\n",
" 'word': 'our',\n",
" 'start': 236,\n",
" 'end': 239},\n",
" {'entity': 'ne',\n",
" 'score': 0.62706536,\n",
" 'index': 60,\n",
" 'word': 'Viking',\n",
" 'start': 240,\n",
" 'end': 246},\n",
" {'entity': 'other',\n",
" 'score': 0.9996927,\n",
" 'index': 61,\n",
" 'word': '1',\n",
" 'start': 247,\n",
" 'end': 248},\n",
" {'entity': 'en',\n",
" 'score': 0.9995969,\n",
" 'index': 62,\n",
" 'word': 'spacecraft',\n",
" 'start': 249,\n",
" 'end': 259},\n",
" {'entity': 'en',\n",
" 'score': 0.9997328,\n",
" 'index': 63,\n",
" 'word': 'was',\n",
" 'start': 260,\n",
" 'end': 263},\n",
" {'entity': 'en',\n",
" 'score': 0.9997286,\n",
" 'index': 64,\n",
" 'word': 'ci',\n",
" 'start': 264,\n",
" 'end': 266},\n",
" {'entity': 'en',\n",
" 'score': 0.9997693,\n",
" 'index': 65,\n",
" 'word': '##rc',\n",
" 'start': 266,\n",
" 'end': 268},\n",
" {'entity': 'en',\n",
" 'score': 0.99981683,\n",
" 'index': 66,\n",
" 'word': '##ling',\n",
" 'start': 268,\n",
" 'end': 272},\n",
" {'entity': 'en',\n",
" 'score': 0.99974436,\n",
" 'index': 67,\n",
" 'word': 'the',\n",
" 'start': 273,\n",
" 'end': 276},\n",
" {'entity': 'en',\n",
" 'score': 0.999772,\n",
" 'index': 68,\n",
" 'word': 'planet',\n",
" 'start': 277,\n",
" 'end': 283},\n",
" {'entity': 'other',\n",
" 'score': 0.9999294,\n",
" 'index': 69,\n",
" 'word': ',',\n",
" 'start': 283,\n",
" 'end': 284},\n",
" {'entity': 'en',\n",
" 'score': 0.9995789,\n",
" 'index': 70,\n",
" 'word': 'sna',\n",
" 'start': 285,\n",
" 'end': 288},\n",
" {'entity': 'en',\n",
" 'score': 0.9996741,\n",
" 'index': 71,\n",
" 'word': '##pping',\n",
" 'start': 288,\n",
" 'end': 293},\n",
" {'entity': 'en',\n",
" 'score': 0.99957246,\n",
" 'index': 72,\n",
" 'word': 'photos',\n",
" 'start': 294,\n",
" 'end': 300},\n",
" {'entity': 'other',\n",
" 'score': 0.99993145,\n",
" 'index': 73,\n",
" 'word': ',',\n",
" 'start': 300,\n",
" 'end': 301},\n",
" {'entity': 'en',\n",
" 'score': 0.9997521,\n",
" 'index': 74,\n",
" 'word': 'when',\n",
" 'start': 302,\n",
" 'end': 306},\n",
" {'entity': 'en',\n",
" 'score': 0.99976677,\n",
" 'index': 75,\n",
" 'word': 'it',\n",
" 'start': 307,\n",
" 'end': 309},\n",
" {'entity': 'en',\n",
" 'score': 0.99971706,\n",
" 'index': 76,\n",
" 'word': 'spotted',\n",
" 'start': 310,\n",
" 'end': 317},\n",
" {'entity': 'en',\n",
" 'score': 0.99961495,\n",
" 'index': 77,\n",
" 'word': 'the',\n",
" 'start': 318,\n",
" 'end': 321},\n",
" {'entity': 'en',\n",
" 'score': 0.9996594,\n",
" 'index': 78,\n",
" 'word': 'sh',\n",
" 'start': 322,\n",
" 'end': 324},\n",
" {'entity': 'en',\n",
" 'score': 0.99976844,\n",
" 'index': 79,\n",
" 'word': '##adow',\n",
" 'start': 324,\n",
" 'end': 328},\n",
" {'entity': 'en',\n",
" 'score': 0.9997464,\n",
" 'index': 80,\n",
" 'word': '##y',\n",
" 'start': 328,\n",
" 'end': 329},\n",
" {'entity': 'en',\n",
" 'score': 0.9997322,\n",
" 'index': 81,\n",
" 'word': 'like',\n",
" 'start': 330,\n",
" 'end': 334},\n",
" {'entity': 'en',\n",
" 'score': 0.99969065,\n",
" 'index': 82,\n",
" 'word': '##ness',\n",
" 'start': 334,\n",
" 'end': 338},\n",
" {'entity': 'en',\n",
" 'score': 0.9996531,\n",
" 'index': 83,\n",
" 'word': 'of',\n",
" 'start': 339,\n",
" 'end': 341},\n",
" {'entity': 'en',\n",
" 'score': 0.9992725,\n",
" 'index': 84,\n",
" 'word': 'a',\n",
" 'start': 342,\n",
" 'end': 343},\n",
" {'entity': 'en',\n",
" 'score': 0.99959975,\n",
" 'index': 85,\n",
" 'word': 'human',\n",
" 'start': 344,\n",
" 'end': 349},\n",
" {'entity': 'en',\n",
" 'score': 0.99973136,\n",
" 'index': 86,\n",
" 'word': 'face',\n",
" 'start': 350,\n",
" 'end': 354},\n",
" {'entity': 'other',\n",
" 'score': 0.9999311,\n",
" 'index': 87,\n",
" 'word': '.',\n",
" 'start': 354,\n",
" 'end': 355},\n",
" {'entity': 'en',\n",
" 'score': 0.9996131,\n",
" 'index': 88,\n",
" 'word': 'Us',\n",
" 'start': 356,\n",
" 'end': 358},\n",
" {'entity': 'en',\n",
" 'score': 0.9996656,\n",
" 'index': 89,\n",
" 'word': 'scientists',\n",
" 'start': 359,\n",
" 'end': 369},\n",
" {'entity': 'en',\n",
" 'score': 0.99980336,\n",
" 'index': 90,\n",
" 'word': 'figure',\n",
" 'start': 370,\n",
" 'end': 376},\n",
" {'entity': 'en',\n",
" 'score': 0.999795,\n",
" 'index': 91,\n",
" 'word': '##d',\n",
" 'start': 376,\n",
" 'end': 377},\n",
" {'entity': 'en',\n",
" 'score': 0.99979943,\n",
" 'index': 92,\n",
" 'word': 'out',\n",
" 'start': 378,\n",
" 'end': 381},\n",
" {'entity': 'en',\n",
" 'score': 0.9997974,\n",
" 'index': 93,\n",
" 'word': 'that',\n",
" 'start': 382,\n",
" 'end': 386},\n",
" {'entity': 'en',\n",
" 'score': 0.99973387,\n",
" 'index': 94,\n",
" 'word': 'it',\n",
" 'start': 387,\n",
" 'end': 389},\n",
" {'entity': 'en',\n",
" 'score': 0.9997162,\n",
" 'index': 95,\n",
" 'word': 'was',\n",
" 'start': 390,\n",
" 'end': 393},\n",
" {'entity': 'en',\n",
" 'score': 0.9996761,\n",
" 'index': 96,\n",
" 'word': 'just',\n",
" 'start': 394,\n",
" 'end': 398},\n",
" {'entity': 'en',\n",
" 'score': 0.9995012,\n",
" 'index': 97,\n",
" 'word': 'another',\n",
" 'start': 399,\n",
" 'end': 406},\n",
" {'entity': 'en',\n",
" 'score': 0.92382264,\n",
" 'index': 98,\n",
" 'word': 'Mart',\n",
" 'start': 407,\n",
" 'end': 411},\n",
" {'entity': 'en',\n",
" 'score': 0.9973562,\n",
" 'index': 99,\n",
" 'word': '##ian',\n",
" 'start': 411,\n",
" 'end': 414},\n",
" {'entity': 'en',\n",
" 'score': 0.7907492,\n",
" 'index': 100,\n",
" 'word': 'mesa',\n",
" 'start': 415,\n",
" 'end': 419},\n",
" {'entity': 'other',\n",
" 'score': 0.9999194,\n",
" 'index': 101,\n",
" 'word': ',',\n",
" 'start': 419,\n",
" 'end': 420},\n",
" {'entity': 'en',\n",
" 'score': 0.9995608,\n",
" 'index': 102,\n",
" 'word': 'common',\n",
" 'start': 421,\n",
" 'end': 427},\n",
" {'entity': 'en',\n",
" 'score': 0.9996966,\n",
" 'index': 103,\n",
" 'word': 'around',\n",
" 'start': 428,\n",
" 'end': 434},\n",
" {'entity': 'ne',\n",
" 'score': 0.9975068,\n",
" 'index': 104,\n",
" 'word': 'C',\n",
" 'start': 435,\n",
" 'end': 436},\n",
" {'entity': 'ne',\n",
" 'score': 0.9957877,\n",
" 'index': 105,\n",
" 'word': '##yd',\n",
" 'start': 436,\n",
" 'end': 438},\n",
" {'entity': 'ne',\n",
" 'score': 0.9961337,\n",
" 'index': 106,\n",
" 'word': '##onia',\n",
" 'start': 438,\n",
" 'end': 442},\n",
" {'entity': 'other',\n",
" 'score': 0.9999093,\n",
" 'index': 107,\n",
" 'word': ',',\n",
" 'start': 442,\n",
" 'end': 443},\n",
" {'entity': 'en',\n",
" 'score': 0.99976605,\n",
" 'index': 108,\n",
" 'word': 'only',\n",
" 'start': 444,\n",
" 'end': 448},\n",
" {'entity': 'en',\n",
" 'score': 0.9997769,\n",
" 'index': 109,\n",
" 'word': 'this',\n",
" 'start': 449,\n",
" 'end': 453},\n",
" {'entity': 'en',\n",
" 'score': 0.99976176,\n",
" 'index': 110,\n",
" 'word': 'one',\n",
" 'start': 454,\n",
" 'end': 457},\n",
" {'entity': 'en',\n",
" 'score': 0.9997123,\n",
" 'index': 111,\n",
" 'word': 'had',\n",
" 'start': 458,\n",
" 'end': 461},\n",
" {'entity': 'en',\n",
" 'score': 0.9997372,\n",
" 'index': 112,\n",
" 'word': 'sh',\n",
" 'start': 462,\n",
" 'end': 464},\n",
" {'entity': 'en',\n",
" 'score': 0.9998115,\n",
" 'index': 113,\n",
" 'word': '##adow',\n",
" 'start': 464,\n",
" 'end': 468},\n",
" {'entity': 'en',\n",
" 'score': 0.99980253,\n",
" 'index': 114,\n",
" 'word': '##s',\n",
" 'start': 468,\n",
" 'end': 469},\n",
" {'entity': 'en',\n",
" 'score': 0.99979705,\n",
" 'index': 115,\n",
" 'word': 'that',\n",
" 'start': 470,\n",
" 'end': 474},\n",
" {'entity': 'en',\n",
" 'score': 0.99979264,\n",
" 'index': 116,\n",
" 'word': 'made',\n",
" 'start': 475,\n",
" 'end': 479},\n",
" {'entity': 'en',\n",
" 'score': 0.9998178,\n",
" 'index': 117,\n",
" 'word': 'it',\n",
" 'start': 480,\n",
" 'end': 482},\n",
" {'entity': 'en',\n",
" 'score': 0.99979025,\n",
" 'index': 118,\n",
" 'word': 'look',\n",
" 'start': 483,\n",
" 'end': 487},\n",
" {'entity': 'en',\n",
" 'score': 0.99977773,\n",
" 'index': 119,\n",
" 'word': 'like',\n",
" 'start': 488,\n",
" 'end': 492},\n",
" {'entity': 'en',\n",
" 'score': 0.99962866,\n",
" 'index': 120,\n",
" 'word': 'an',\n",
" 'start': 493,\n",
" 'end': 495},\n",
" {'entity': 'ne',\n",
" 'score': 0.68299395,\n",
" 'index': 121,\n",
" 'word': 'Egypt',\n",
" 'start': 496,\n",
" 'end': 501},\n",
" {'entity': 'en',\n",
" 'score': 0.9760886,\n",
" 'index': 122,\n",
" 'word': '##ion',\n",
" 'start': 501,\n",
" 'end': 504},\n",
" {'entity': 'en',\n",
" 'score': 0.9997099,\n",
" 'index': 123,\n",
" 'word': 'Ph',\n",
" 'start': 505,\n",
" 'end': 507},\n",
" {'entity': 'en',\n",
" 'score': 0.9998056,\n",
" 'index': 124,\n",
" 'word': '##ara',\n",
" 'start': 507,\n",
" 'end': 510},\n",
" {'entity': 'en',\n",
" 'score': 0.99973387,\n",
" 'index': 125,\n",
" 'word': '##oh',\n",
" 'start': 510,\n",
" 'end': 512},\n",
" {'entity': 'other',\n",
" 'score': 0.9999268,\n",
" 'index': 126,\n",
" 'word': '.',\n",
" 'start': 512,\n",
" 'end': 513},\n",
" {'entity': 'en',\n",
" 'score': 0.99796546,\n",
" 'index': 127,\n",
" 'word': 'Very',\n",
" 'start': 514,\n",
" 'end': 518},\n",
" {'entity': 'en',\n",
" 'score': 0.9990651,\n",
" 'index': 128,\n",
" 'word': 'few',\n",
" 'start': 519,\n",
" 'end': 522},\n",
" {'entity': 'en',\n",
" 'score': 0.9994499,\n",
" 'index': 129,\n",
" 'word': 'days',\n",
" 'start': 523,\n",
" 'end': 527},\n",
" {'entity': 'en',\n",
" 'score': 0.9995864,\n",
" 'index': 130,\n",
" 'word': 'later',\n",
" 'start': 528,\n",
" 'end': 533},\n",
" {'entity': 'other',\n",
" 'score': 0.99990845,\n",
" 'index': 131,\n",
" 'word': ',',\n",
" 'start': 533,\n",
" 'end': 534},\n",
" {'entity': 'en',\n",
" 'score': 0.99973947,\n",
" 'index': 132,\n",
" 'word': 'we',\n",
" 'start': 535,\n",
" 'end': 537},\n",
" {'entity': 'en',\n",
" 'score': 0.99979526,\n",
" 'index': 133,\n",
" 'word': 'revealed',\n",
" 'start': 538,\n",
" 'end': 546},\n",
" {'entity': 'en',\n",
" 'score': 0.9997683,\n",
" 'index': 134,\n",
" 'word': 'the',\n",
" 'start': 547,\n",
" 'end': 550},\n",
" {'entity': 'en',\n",
" 'score': 0.999793,\n",
" 'index': 135,\n",
" 'word': 'image',\n",
" 'start': 551,\n",
" 'end': 556},\n",
" {'entity': 'en',\n",
" 'score': 0.99985754,\n",
" 'index': 136,\n",
" 'word': 'for',\n",
" 'start': 557,\n",
" 'end': 560},\n",
" {'entity': 'en',\n",
" 'score': 0.999843,\n",
" 'index': 137,\n",
" 'word': 'all',\n",
" 'start': 561,\n",
" 'end': 564},\n",
" {'entity': 'en',\n",
" 'score': 0.99983895,\n",
" 'index': 138,\n",
" 'word': 'to',\n",
" 'start': 565,\n",
" 'end': 567},\n",
" {'entity': 'en',\n",
" 'score': 0.9998221,\n",
" 'index': 139,\n",
" 'word': 'see',\n",
" 'start': 568,\n",
" 'end': 571},\n",
" {'entity': 'other',\n",
" 'score': 0.99992704,\n",
" 'index': 140,\n",
" 'word': ',',\n",
" 'start': 571,\n",
" 'end': 572},\n",
" {'entity': 'en',\n",
" 'score': 0.9997582,\n",
" 'index': 141,\n",
" 'word': 'and',\n",
" 'start': 573,\n",
" 'end': 576},\n",
" {'entity': 'en',\n",
" 'score': 0.99974746,\n",
" 'index': 142,\n",
" 'word': 'we',\n",
" 'start': 577,\n",
" 'end': 579},\n",
" {'entity': 'en',\n",
" 'score': 0.9997589,\n",
" 'index': 143,\n",
" 'word': 'made',\n",
" 'start': 580,\n",
" 'end': 584},\n",
" {'entity': 'en',\n",
" 'score': 0.9997596,\n",
" 'index': 144,\n",
" 'word': 'sure',\n",
" 'start': 585,\n",
" 'end': 589},\n",
" {'entity': 'en',\n",
" 'score': 0.9997719,\n",
" 'index': 145,\n",
" 'word': 'to',\n",
" 'start': 590,\n",
" 'end': 592},\n",
" {'entity': 'en',\n",
" 'score': 0.99976,\n",
" 'index': 146,\n",
" 'word': 'note',\n",
" 'start': 593,\n",
" 'end': 597},\n",
" {'entity': 'en',\n",
" 'score': 0.9997385,\n",
" 'index': 147,\n",
" 'word': 'that',\n",
" 'start': 598,\n",
" 'end': 602},\n",
" {'entity': 'en',\n",
" 'score': 0.999699,\n",
" 'index': 148,\n",
" 'word': 'it',\n",
" 'start': 603,\n",
" 'end': 605},\n",
" {'entity': 'en',\n",
" 'score': 0.9996177,\n",
" 'index': 149,\n",
" 'word': 'was',\n",
" 'start': 606,\n",
" 'end': 609},\n",
" {'entity': 'en',\n",
" 'score': 0.99906355,\n",
" 'index': 150,\n",
" 'word': 'a',\n",
" 'start': 610,\n",
" 'end': 611},\n",
" {'entity': 'en',\n",
" 'score': 0.9993754,\n",
" 'index': 151,\n",
" 'word': 'huge',\n",
" 'start': 612,\n",
" 'end': 616},\n",
" {'entity': 'en',\n",
" 'score': 0.99958175,\n",
" 'index': 152,\n",
" 'word': 'rock',\n",
" 'start': 617,\n",
" 'end': 621},\n",
" {'entity': 'en',\n",
" 'score': 0.9996152,\n",
" 'index': 153,\n",
" 'word': 'formation',\n",
" 'start': 622,\n",
" 'end': 631},\n",
" {'entity': 'en',\n",
" 'score': 0.9995758,\n",
" 'index': 154,\n",
" 'word': 'that',\n",
" 'start': 632,\n",
" 'end': 636},\n",
" {'entity': 'en',\n",
" 'score': 0.9995844,\n",
" 'index': 155,\n",
" 'word': 'just',\n",
" 'start': 637,\n",
" 'end': 641},\n",
" {'entity': 'en',\n",
" 'score': 0.99950886,\n",
" 'index': 156,\n",
" 'word': 'res',\n",
" 'start': 642,\n",
" 'end': 645},\n",
" {'entity': 'en',\n",
" 'score': 0.9995715,\n",
" 'index': 157,\n",
" 'word': '##emble',\n",
" 'start': 645,\n",
" 'end': 650},\n",
" {'entity': 'en',\n",
" 'score': 0.99958056,\n",
" 'index': 158,\n",
" 'word': '##d',\n",
" 'start': 650,\n",
" 'end': 651},\n",
" {'entity': 'en',\n",
" 'score': 0.99861777,\n",
" 'index': 159,\n",
" 'word': 'a',\n",
" 'start': 652,\n",
" 'end': 653},\n",
" {'entity': 'en',\n",
" 'score': 0.99934405,\n",
" 'index': 160,\n",
" 'word': 'human',\n",
" 'start': 654,\n",
" 'end': 659},\n",
" {'entity': 'en',\n",
" 'score': 0.9995927,\n",
" 'index': 161,\n",
" 'word': 'head',\n",
" 'start': 660,\n",
" 'end': 664},\n",
" {'entity': 'en',\n",
" 'score': 0.9994898,\n",
" 'index': 162,\n",
" 'word': 'and',\n",
" 'start': 665,\n",
" 'end': 668},\n",
" {'entity': 'en',\n",
" 'score': 0.9995479,\n",
" 'index': 163,\n",
" 'word': 'face',\n",
" 'start': 669,\n",
" 'end': 673},\n",
" {'entity': 'other',\n",
" 'score': 0.9999279,\n",
" 'index': 164,\n",
" 'word': ',',\n",
" 'start': 673,\n",
" 'end': 674},\n",
" {'entity': 'en',\n",
" 'score': 0.9997607,\n",
" 'index': 165,\n",
" 'word': 'but',\n",
" 'start': 675,\n",
" 'end': 678},\n",
" {'entity': 'en',\n",
" 'score': 0.99969375,\n",
" 'index': 166,\n",
" 'word': 'all',\n",
" 'start': 679,\n",
" 'end': 682},\n",
" {'entity': 'en',\n",
" 'score': 0.99976605,\n",
" 'index': 167,\n",
" 'word': 'of',\n",
" 'start': 683,\n",
" 'end': 685},\n",
" {'entity': 'en',\n",
" 'score': 0.99975795,\n",
" 'index': 168,\n",
" 'word': 'it',\n",
" 'start': 686,\n",
" 'end': 688},\n",
" {'entity': 'en',\n",
" 'score': 0.99969816,\n",
" 'index': 169,\n",
" 'word': 'was',\n",
" 'start': 689,\n",
" 'end': 692},\n",
" {'entity': 'en',\n",
" 'score': 0.9996636,\n",
" 'index': 170,\n",
" 'word': 'formed',\n",
" 'start': 693,\n",
" 'end': 699},\n",
" {'entity': 'en',\n",
" 'score': 0.9995894,\n",
" 'index': 171,\n",
" 'word': 'by',\n",
" 'start': 700,\n",
" 'end': 702},\n",
" {'entity': 'en',\n",
" 'score': 0.9996039,\n",
" 'index': 172,\n",
" 'word': 'sh',\n",
" 'start': 703,\n",
" 'end': 705},\n",
" {'entity': 'en',\n",
" 'score': 0.9997403,\n",
" 'index': 173,\n",
" 'word': '##adow',\n",
" 'start': 705,\n",
" 'end': 709},\n",
" {'entity': 'en',\n",
" 'score': 0.9997271,\n",
" 'index': 174,\n",
" 'word': '##s',\n",
" 'start': 709,\n",
" 'end': 710},\n",
" {'entity': 'other',\n",
" 'score': 0.9999298,\n",
" 'index': 175,\n",
" 'word': '.',\n",
" 'start': 710,\n",
" 'end': 711},\n",
" {'entity': 'en',\n",
" 'score': 0.9996859,\n",
" 'index': 176,\n",
" 'word': 'We',\n",
" 'start': 712,\n",
" 'end': 714},\n",
" {'entity': 'en',\n",
" 'score': 0.99971765,\n",
" 'index': 177,\n",
" 'word': 'only',\n",
" 'start': 715,\n",
" 'end': 719},\n",
" {'entity': 'en',\n",
" 'score': 0.99975497,\n",
" 'index': 178,\n",
" 'word': 'announced',\n",
" 'start': 720,\n",
" 'end': 729},\n",
" {'entity': 'en',\n",
" 'score': 0.9997633,\n",
" 'index': 179,\n",
" 'word': 'it',\n",
" 'start': 730,\n",
" 'end': 732},\n",
" {'entity': 'en',\n",
" 'score': 0.9997826,\n",
" 'index': 180,\n",
" 'word': 'because',\n",
" 'start': 733,\n",
" 'end': 740},\n",
" {'entity': 'en',\n",
" 'score': 0.99974495,\n",
" 'index': 181,\n",
" 'word': 'we',\n",
" 'start': 741,\n",
" 'end': 743},\n",
" {'entity': 'en',\n",
" 'score': 0.9997533,\n",
" 'index': 182,\n",
" 'word': 'thought',\n",
" 'start': 744,\n",
" 'end': 751},\n",
" {'entity': 'en',\n",
" 'score': 0.9997551,\n",
" 'index': 183,\n",
" 'word': 'it',\n",
" 'start': 752,\n",
" 'end': 754},\n",
" {'entity': 'en',\n",
" 'score': 0.99974245,\n",
" 'index': 184,\n",
" 'word': 'would',\n",
" 'start': 755,\n",
" 'end': 760},\n",
" {'entity': 'en',\n",
" 'score': 0.9996723,\n",
" 'index': 185,\n",
" 'word': 'be',\n",
" 'start': 761,\n",
" 'end': 763},\n",
" {'entity': 'en',\n",
" 'score': 0.9992391,\n",
" 'index': 186,\n",
" 'word': 'a',\n",
" 'start': 764,\n",
" 'end': 765},\n",
" {'entity': 'en',\n",
" 'score': 0.9996873,\n",
" 'index': 187,\n",
" 'word': 'good',\n",
" 'start': 766,\n",
" 'end': 770},\n",
" {'entity': 'en',\n",
" 'score': 0.9997737,\n",
" 'index': 188,\n",
" 'word': 'way',\n",
" 'start': 771,\n",
" 'end': 774},\n",
" {'entity': 'en',\n",
" 'score': 0.99975866,\n",
" 'index': 189,\n",
" 'word': 'to',\n",
" 'start': 775,\n",
" 'end': 777},\n",
" {'entity': 'en',\n",
" 'score': 0.99975985,\n",
" 'index': 190,\n",
" 'word': 'engage',\n",
" 'start': 778,\n",
" 'end': 784},\n",
" {'entity': 'en',\n",
" 'score': 0.9996723,\n",
" 'index': 191,\n",
" 'word': 'the',\n",
" 'start': 785,\n",
" 'end': 788},\n",
" {'entity': 'en',\n",
" 'score': 0.9997609,\n",
" 'index': 192,\n",
" 'word': 'public',\n",
" 'start': 789,\n",
" 'end': 795},\n",
" {'entity': 'en',\n",
" 'score': 0.9997042,\n",
" 'index': 193,\n",
" 'word': 'with',\n",
" 'start': 796,\n",
" 'end': 800},\n",
" {'entity': 'ne',\n",
" 'score': 0.99519366,\n",
" 'index': 194,\n",
" 'word': 'NASA',\n",
" 'start': 801,\n",
" 'end': 805},\n",
" {'entity': 'en',\n",
" 'score': 0.99966383,\n",
" 'index': 195,\n",
" 'word': \"'\",\n",
" 'start': 805,\n",
" 'end': 806},\n",
" {'entity': 'en',\n",
" 'score': 0.99948585,\n",
" 'index': 196,\n",
" 'word': 's',\n",
" 'start': 806,\n",
" 'end': 807},\n",
" {'entity': 'en',\n",
" 'score': 0.9994748,\n",
" 'index': 197,\n",
" 'word': 'findings',\n",
" 'start': 808,\n",
" 'end': 816},\n",
" {'entity': 'other',\n",
" 'score': 0.99991834,\n",
" 'index': 198,\n",
" 'word': ',',\n",
" 'start': 816,\n",
" 'end': 817},\n",
" {'entity': 'en',\n",
" 'score': 0.99959546,\n",
" 'index': 199,\n",
" 'word': 'and',\n",
" 'start': 818,\n",
" 'end': 821},\n",
" {'entity': 'en',\n",
" 'score': 0.9996618,\n",
" 'index': 200,\n",
" 'word': 'at',\n",
" 'start': 822,\n",
" 'end': 824},\n",
" {'entity': 'en',\n",
" 'score': 0.99977297,\n",
" 'index': 201,\n",
" 'word': '##rra',\n",
" 'start': 824,\n",
" 'end': 827},\n",
" {'entity': 'en',\n",
" 'score': 0.9997421,\n",
" 'index': 202,\n",
" 'word': '##ct',\n",
" 'start': 827,\n",
" 'end': 829},\n",
" {'entity': 'en',\n",
" 'score': 0.9997882,\n",
" 'index': 203,\n",
" 'word': 'attention',\n",
" 'start': 830,\n",
" 'end': 839},\n",
" {'entity': 'en',\n",
" 'score': 0.99970275,\n",
" 'index': 204,\n",
" 'word': 'to',\n",
" 'start': 840,\n",
" 'end': 842},\n",
" {'entity': 'ne',\n",
" 'score': 0.9933374,\n",
" 'index': 205,\n",
" 'word': 'Mars',\n",
" 'start': 843,\n",
" 'end': 847},\n",
" {'entity': 'other',\n",
" 'score': 0.99984145,\n",
" 'index': 206,\n",
" 'word': '-',\n",
" 'start': 847,\n",
" 'end': 848},\n",
" {'entity': 'other',\n",
" 'score': 0.9995536,\n",
" 'index': 207,\n",
" 'word': '-',\n",
" 'start': 848,\n",
" 'end': 849},\n",
" {'entity': 'en',\n",
" 'score': 0.999739,\n",
" 'index': 208,\n",
" 'word': 'and',\n",
" 'start': 850,\n",
" 'end': 853},\n",
" {'entity': 'en',\n",
" 'score': 0.9997385,\n",
" 'index': 209,\n",
" 'word': 'it',\n",
" 'start': 854,\n",
" 'end': 856},\n",
" {'entity': 'en',\n",
" 'score': 0.9997167,\n",
" 'index': 210,\n",
" 'word': 'did',\n",
" 'start': 857,\n",
" 'end': 860},\n",
" {'entity': 'other',\n",
" 'score': 0.99991965,\n",
" 'index': 211,\n",
" 'word': '.',\n",
" 'start': 860,\n",
" 'end': 861},\n",
" {'entity': 'en',\n",
" 'score': 0.9994844,\n",
" 'index': 212,\n",
" 'word': 'The',\n",
" 'start': 863,\n",
" 'end': 866},\n",
" {'entity': 'en',\n",
" 'score': 0.9995809,\n",
" 'index': 213,\n",
" 'word': 'face',\n",
" 'start': 867,\n",
" 'end': 871},\n",
" {'entity': 'en',\n",
" 'score': 0.9993393,\n",
" 'index': 214,\n",
" 'word': 'on',\n",
" 'start': 872,\n",
" 'end': 874},\n",
" {'entity': 'ne',\n",
" 'score': 0.9903474,\n",
" 'index': 215,\n",
" 'word': 'Mars',\n",
" 'start': 875,\n",
" 'end': 879},\n",
" {'entity': 'en',\n",
" 'score': 0.99960655,\n",
" 'index': 216,\n",
" 'word': 'soon',\n",
" 'start': 880,\n",
" 'end': 884},\n",
" {'entity': 'en',\n",
" 'score': 0.9992306,\n",
" 'index': 217,\n",
" 'word': 'became',\n",
" 'start': 885,\n",
" 'end': 891},\n",
" {'entity': 'en',\n",
" 'score': 0.9976654,\n",
" 'index': 218,\n",
" 'word': 'a',\n",
" 'start': 892,\n",
" 'end': 893},\n",
" {'entity': 'en',\n",
" 'score': 0.99933064,\n",
" 'index': 219,\n",
" 'word': 'pop',\n",
" 'start': 894,\n",
" 'end': 897},\n",
" {'entity': 'en',\n",
" 'score': 0.99950266,\n",
" 'index': 220,\n",
" 'word': 'i',\n",
" 'start': 898,\n",
" 'end': 899},\n",
" {'entity': 'en',\n",
" 'score': 0.99954295,\n",
" 'index': 221,\n",
" 'word': '##con',\n",
" 'start': 899,\n",
" 'end': 902},\n",
" {'entity': 'other',\n",
" 'score': 0.99992657,\n",
" 'index': 222,\n",
" 'word': ';',\n",
" 'start': 902,\n",
" 'end': 903},\n",
" {'entity': 'en',\n",
" 'score': 0.9994814,\n",
" 'index': 223,\n",
" 'word': 'shot',\n",
" 'start': 904,\n",
" 'end': 908},\n",
" {'entity': 'en',\n",
" 'score': 0.9994511,\n",
" 'index': 224,\n",
" 'word': 'in',\n",
" 'start': 909,\n",
" 'end': 911},\n",
" {'entity': 'en',\n",
" 'score': 0.9992155,\n",
" 'index': 225,\n",
" 'word': 'movies',\n",
" 'start': 912,\n",
" 'end': 918},\n",
" {'entity': 'other',\n",
" 'score': 0.99992335,\n",
" 'index': 226,\n",
" 'word': ',',\n",
" 'start': 918,\n",
" 'end': 919},\n",
" {'entity': 'en',\n",
" 'score': 0.9993299,\n",
" 'index': 227,\n",
" 'word': 'appeared',\n",
" 'start': 920,\n",
" 'end': 928},\n",
" {'entity': 'en',\n",
" 'score': 0.9994456,\n",
" 'index': 228,\n",
" 'word': 'in',\n",
" 'start': 929,\n",
" 'end': 931},\n",
" {'entity': 'en',\n",
" 'score': 0.9991399,\n",
" 'index': 229,\n",
" 'word': 'books',\n",
" 'start': 932,\n",
" 'end': 937},\n",
" {'entity': 'other',\n",
" 'score': 0.9999262,\n",
" 'index': 230,\n",
" 'word': ',',\n",
" 'start': 937,\n",
" 'end': 938},\n",
" {'entity': 'en',\n",
" 'score': 0.99925786,\n",
" 'index': 231,\n",
" 'word': 'magazines',\n",
" 'start': 939,\n",
" 'end': 948},\n",
" {'entity': 'other',\n",
" 'score': 0.99992716,\n",
" 'index': 232,\n",
" 'word': ',',\n",
" 'start': 948,\n",
" 'end': 949},\n",
" {'entity': 'en',\n",
" 'score': 0.99942744,\n",
" 'index': 233,\n",
" 'word': 'radio',\n",
" 'start': 950,\n",
" 'end': 955},\n",
" {'entity': 'en',\n",
" 'score': 0.9996457,\n",
" 'index': 234,\n",
" 'word': 'talk',\n",
" 'start': 956,\n",
" 'end': 960},\n",
" {'entity': 'en',\n",
" 'score': 0.9996043,\n",
" 'index': 235,\n",
" 'word': 'shows',\n",
" 'start': 961,\n",
" 'end': 966},\n",
" {'entity': 'other',\n",
" 'score': 0.9999255,\n",
" 'index': 236,\n",
" 'word': ',',\n",
" 'start': 966,\n",
" 'end': 967},\n",
" {'entity': 'en',\n",
" 'score': 0.99946564,\n",
" 'index': 237,\n",
" 'word': 'and',\n",
" 'start': 968,\n",
" 'end': 971},\n",
" {'entity': 'en',\n",
" 'score': 0.9997336,\n",
" 'index': 238,\n",
" 'word': 'hau',\n",
" 'start': 972,\n",
" 'end': 975},\n",
" {'entity': 'en',\n",
" 'score': 0.99972874,\n",
" 'index': 239,\n",
" 'word': '##nted',\n",
" 'start': 975,\n",
" 'end': 979},\n",
" {'entity': 'en',\n",
" 'score': 0.9996462,\n",
" 'index': 240,\n",
" 'word': 'gr',\n",
" 'start': 980,\n",
" 'end': 982},\n",
" {'entity': 'en',\n",
" 'score': 0.9997458,\n",
" 'index': 241,\n",
" 'word': '##oce',\n",
" 'start': 982,\n",
" 'end': 985},\n",
" {'entity': 'en',\n",
" 'score': 0.9996829,\n",
" 'index': 242,\n",
" 'word': '##ry',\n",
" 'start': 985,\n",
" 'end': 987},\n",
" {'entity': 'en',\n",
" 'score': 0.99970573,\n",
" 'index': 243,\n",
" 'word': 'store',\n",
" 'start': 988,\n",
" 'end': 993},\n",
" {'entity': 'en',\n",
" 'score': 0.9997999,\n",
" 'index': 244,\n",
" 'word': 'check',\n",
" 'start': 994,\n",
" 'end': 999},\n",
" {'entity': 'en',\n",
" 'score': 0.9997851,\n",
" 'index': 245,\n",
" 'word': '##out',\n",
" 'start': 999,\n",
" 'end': 1002},\n",
" {'entity': 'en',\n",
" 'score': 0.9997584,\n",
" 'index': 246,\n",
" 'word': 'lines',\n",
" 'start': 1003,\n",
" 'end': 1008},\n",
" {'entity': 'en',\n",
" 'score': 0.99963033,\n",
" 'index': 247,\n",
" 'word': 'for',\n",
" 'start': 1009,\n",
" 'end': 1012},\n",
" {'entity': 'other',\n",
" 'score': 0.90661347,\n",
" 'index': 248,\n",
" 'word': '25',\n",
" 'start': 1013,\n",
" 'end': 1015},\n",
" {'entity': 'en',\n",
" 'score': 0.99967,\n",
" 'index': 249,\n",
" 'word': 'years',\n",
" 'start': 1016,\n",
" 'end': 1021},\n",
" {'entity': 'other',\n",
" 'score': 0.99992573,\n",
" 'index': 250,\n",
" 'word': '.',\n",
" 'start': 1021,\n",
" 'end': 1022},\n",
" {'entity': 'en',\n",
" 'score': 0.9997687,\n",
" 'index': 251,\n",
" 'word': 'Some',\n",
" 'start': 1023,\n",
" 'end': 1027},\n",
" {'entity': 'en',\n",
" 'score': 0.9997943,\n",
" 'index': 252,\n",
" 'word': 'people',\n",
" 'start': 1028,\n",
" 'end': 1034},\n",
" {'entity': 'en',\n",
" 'score': 0.9997925,\n",
" 'index': 253,\n",
" 'word': 'thought',\n",
" 'start': 1035,\n",
" 'end': 1042},\n",
" {'entity': 'en',\n",
" 'score': 0.9996296,\n",
" 'index': 254,\n",
" 'word': 'the',\n",
" 'start': 1043,\n",
" 'end': 1046},\n",
" {'entity': 'en',\n",
" 'score': 0.99950707,\n",
" 'index': 255,\n",
" 'word': 'natural',\n",
" 'start': 1047,\n",
" 'end': 1054},\n",
" {'entity': 'en',\n",
" 'score': 0.9997507,\n",
" 'index': 256,\n",
" 'word': 'land',\n",
" 'start': 1055,\n",
" 'end': 1059},\n",
" {'entity': 'en',\n",
" 'score': 0.99979764,\n",
" 'index': 257,\n",
" 'word': '##form',\n",
" 'start': 1059,\n",
" 'end': 1063},\n",
" {'entity': 'en',\n",
" 'score': 0.99975556,\n",
" 'index': 258,\n",
" 'word': 'was',\n",
" 'start': 1064,\n",
" 'end': 1067},\n",
" {'entity': 'en',\n",
" 'score': 0.99978846,\n",
" 'index': 259,\n",
" 'word': 'evidence',\n",
" 'start': 1068,\n",
" 'end': 1076},\n",
" {'entity': 'en',\n",
" 'score': 0.9998209,\n",
" 'index': 260,\n",
" 'word': 'of',\n",
" 'start': 1077,\n",
" 'end': 1079},\n",
" {'entity': 'en',\n",
" 'score': 0.99982053,\n",
" 'index': 261,\n",
" 'word': 'life',\n",
" 'start': 1080,\n",
" 'end': 1084},\n",
" {'entity': 'en',\n",
" 'score': 0.999806,\n",
" 'index': 262,\n",
" 'word': 'on',\n",
" 'start': 1085,\n",
" 'end': 1087},\n",
" {'entity': 'ne',\n",
" 'score': 0.9924434,\n",
" 'index': 263,\n",
" 'word': 'Mars',\n",
" 'start': 1088,\n",
" 'end': 1092},\n",
" {'entity': 'other',\n",
" 'score': 0.9999304,\n",
" 'index': 264,\n",
" 'word': ',',\n",
" 'start': 1092,\n",
" 'end': 1093},\n",
" {'entity': 'en',\n",
" 'score': 0.9998184,\n",
" 'index': 265,\n",
" 'word': 'and',\n",
" 'start': 1094,\n",
" 'end': 1097},\n",
" {'entity': 'en',\n",
" 'score': 0.9998319,\n",
" 'index': 266,\n",
" 'word': 'that',\n",
" 'start': 1098,\n",
" 'end': 1102},\n",
" {'entity': 'en',\n",
" 'score': 0.9995714,\n",
" 'index': 267,\n",
" 'word': 'us',\n",
" 'start': 1103,\n",
" 'end': 1105},\n",
" {'entity': 'en',\n",
" 'score': 0.9997626,\n",
" 'index': 268,\n",
" 'word': 'scientists',\n",
" 'start': 1106,\n",
" 'end': 1116},\n",
" {'entity': 'en',\n",
" 'score': 0.9998241,\n",
" 'index': 269,\n",
" 'word': 'wanted',\n",
" 'start': 1117,\n",
" 'end': 1123},\n",
" {'entity': 'en',\n",
" 'score': 0.9998323,\n",
" 'index': 270,\n",
" 'word': 'to',\n",
" 'start': 1124,\n",
" 'end': 1126},\n",
" {'entity': 'en',\n",
" 'score': 0.99986374,\n",
" 'index': 271,\n",
" 'word': 'hide',\n",
" 'start': 1127,\n",
" 'end': 1131},\n",
" {'entity': 'en',\n",
" 'score': 0.99984705,\n",
" 'index': 272,\n",
" 'word': 'it',\n",
" 'start': 1132,\n",
" 'end': 1134},\n",
" {'entity': 'other',\n",
" 'score': 0.99992883,\n",
" 'index': 273,\n",
" 'word': ',',\n",
" 'start': 1134,\n",
" 'end': 1135},\n",
" {'entity': 'en',\n",
" 'score': 0.99986625,\n",
" 'index': 274,\n",
" 'word': 'but',\n",
" 'start': 1136,\n",
" 'end': 1139},\n",
" {'entity': 'en',\n",
" 'score': 0.99982065,\n",
" 'index': 275,\n",
" 'word': 'really',\n",
" 'start': 1140,\n",
" 'end': 1146},\n",
" {'entity': 'other',\n",
" 'score': 0.9999231,\n",
" 'index': 276,\n",
" 'word': ',',\n",
" 'start': 1146,\n",
" 'end': 1147},\n",
" {'entity': 'en',\n",
" 'score': 0.99983907,\n",
" 'index': 277,\n",
" 'word': 'the',\n",
" 'start': 1148,\n",
" 'end': 1151},\n",
" {'entity': 'en',\n",
" 'score': 0.9998418,\n",
" 'index': 278,\n",
" 'word': 'defender',\n",
" 'start': 1152,\n",
" 'end': 1160},\n",
" {'entity': 'en',\n",
" 'score': 0.9998036,\n",
" 'index': 279,\n",
" 'word': '##s',\n",
" 'start': 1160,\n",
" 'end': 1161},\n",
" {'entity': 'en',\n",
" 'score': 0.9998399,\n",
" 'index': 280,\n",
" 'word': 'of',\n",
" 'start': 1162,\n",
" 'end': 1164},\n",
" {'entity': 'en',\n",
" 'score': 0.9997856,\n",
" 'index': 281,\n",
" 'word': 'the',\n",
" 'start': 1165,\n",
" 'end': 1168},\n",
" {'entity': 'ne',\n",
" 'score': 0.9955486,\n",
" 'index': 282,\n",
" 'word': 'NASA',\n",
" 'start': 1169,\n",
" 'end': 1173},\n",
" {'entity': 'en',\n",
" 'score': 0.9998142,\n",
" 'index': 283,\n",
" 'word': 'budget',\n",
" 'start': 1174,\n",
" 'end': 1180},\n",
" {'entity': 'en',\n",
" 'score': 0.9998621,\n",
" 'index': 284,\n",
" 'word': 'wish',\n",
" 'start': 1181,\n",
" 'end': 1185},\n",
" {'entity': 'en',\n",
" 'score': 0.99985635,\n",
" 'index': 285,\n",
" 'word': 'there',\n",
" 'start': 1186,\n",
" 'end': 1191},\n",
" {'entity': 'en',\n",
" 'score': 0.9998217,\n",
" 'index': 286,\n",
" 'word': 'was',\n",
" 'start': 1192,\n",
" 'end': 1195},\n",
" {'entity': 'en',\n",
" 'score': 0.9998242,\n",
" 'index': 287,\n",
" 'word': 'ancient',\n",
" 'start': 1196,\n",
" 'end': 1203},\n",
" {'entity': 'en',\n",
" 'score': 0.9998467,\n",
" 'index': 288,\n",
" 'word': 'civili',\n",
" 'start': 1204,\n",
" 'end': 1210},\n",
" {'entity': 'en',\n",
" 'score': 0.9998492,\n",
" 'index': 289,\n",
" 'word': '##zation',\n",
" 'start': 1210,\n",
" 'end': 1216},\n",
" {'entity': 'en',\n",
" 'score': 0.99985075,\n",
" 'index': 290,\n",
" 'word': 'on',\n",
" 'start': 1217,\n",
" 'end': 1219},\n",
" {'entity': 'ne',\n",
" 'score': 0.9934476,\n",
" 'index': 291,\n",
" 'word': 'Mars',\n",
" 'start': 1220,\n",
" 'end': 1224},\n",
" {'entity': 'other',\n",
" 'score': 0.9999279,\n",
" 'index': 292,\n",
" 'word': '.',\n",
" 'start': 1224,\n",
" 'end': 1225},\n",
" {'entity': 'en',\n",
" 'score': 0.9996929,\n",
" 'index': 293,\n",
" 'word': 'We',\n",
" 'start': 1226,\n",
" 'end': 1228},\n",
" {'entity': 'en',\n",
" 'score': 0.9997583,\n",
" 'index': 294,\n",
" 'word': 'decided',\n",
" 'start': 1229,\n",
" 'end': 1236},\n",
" {'entity': 'en',\n",
" 'score': 0.99972254,\n",
" 'index': 295,\n",
" 'word': 'to',\n",
" 'start': 1237,\n",
" 'end': 1239},\n",
" {'entity': 'en',\n",
" 'score': 0.99963474,\n",
" 'index': 296,\n",
" 'word': 'take',\n",
" 'start': 1240,\n",
" 'end': 1244},\n",
" {'entity': 'en',\n",
" 'score': 0.99941206,\n",
" 'index': 297,\n",
" 'word': 'another',\n",
" 'start': 1245,\n",
" 'end': 1252},\n",
" {'entity': 'en',\n",
" 'score': 0.99968994,\n",
" 'index': 298,\n",
" 'word': 'shot',\n",
" 'start': 1253,\n",
" 'end': 1257},\n",
" {'entity': 'en',\n",
" 'score': 0.9997118,\n",
" 'index': 299,\n",
" 'word': 'just',\n",
" 'start': 1258,\n",
" 'end': 1262},\n",
" {'entity': 'en',\n",
" 'score': 0.9997631,\n",
" 'index': 300,\n",
" 'word': 'to',\n",
" 'start': 1263,\n",
" 'end': 1265},\n",
" {'entity': 'en',\n",
" 'score': 0.99978083,\n",
" 'index': 301,\n",
" 'word': 'make',\n",
" 'start': 1266,\n",
" 'end': 1270},\n",
" {'entity': 'en',\n",
" 'score': 0.9998004,\n",
" 'index': 302,\n",
" 'word': 'sure',\n",
" 'start': 1271,\n",
" 'end': 1275},\n",
" {'entity': 'en',\n",
" 'score': 0.99981683,\n",
" 'index': 303,\n",
" 'word': 'we',\n",
" 'start': 1276,\n",
" 'end': 1278},\n",
" {'entity': 'en',\n",
" 'score': 0.99981815,\n",
" 'index': 304,\n",
" 'word': 'were',\n",
" 'start': 1279,\n",
" 'end': 1283},\n",
" {'entity': 'en',\n",
" 'score': 0.99982005,\n",
" 'index': 305,\n",
" 'word': '##n',\n",
" 'start': 1283,\n",
" 'end': 1284},\n",
" {'entity': 'en',\n",
" 'score': 0.9997904,\n",
" 'index': 306,\n",
" 'word': \"'\",\n",
" 'start': 1284,\n",
" 'end': 1285},\n",
" {'entity': 'en',\n",
" 'score': 0.999744,\n",
" 'index': 307,\n",
" 'word': 't',\n",
" 'start': 1285,\n",
" 'end': 1286},\n",
" {'entity': 'en',\n",
" 'score': 0.9998004,\n",
" 'index': 308,\n",
" 'word': 'wrong',\n",
" 'start': 1287,\n",
" 'end': 1292},\n",
" {'entity': 'other',\n",
" 'score': 0.99991894,\n",
" 'index': 309,\n",
" 'word': ',',\n",
" 'start': 1292,\n",
" 'end': 1293},\n",
" {'entity': 'en',\n",
" 'score': 0.9986118,\n",
" 'index': 310,\n",
" 'word': 'on',\n",
" 'start': 1294,\n",
" 'end': 1296},\n",
" {'entity': 'other',\n",
" 'score': 0.53777504,\n",
" 'index': 311,\n",
" 'word': 'April',\n",
" 'start': 1297,\n",
" 'end': 1302},\n",
" {'entity': 'other',\n",
" 'score': 0.9973557,\n",
" 'index': 312,\n",
" 'word': '5',\n",
" 'start': 1303,\n",
" 'end': 1304},\n",
" {'entity': 'other',\n",
" 'score': 0.9998646,\n",
" 'index': 313,\n",
" 'word': ',',\n",
" 'start': 1304,\n",
" 'end': 1305},\n",
" {'entity': 'other',\n",
" 'score': 0.99943346,\n",
" 'index': 314,\n",
" 'word': '1998',\n",
" 'start': 1306,\n",
" 'end': 1310},\n",
" {'entity': 'other',\n",
" 'score': 0.9999175,\n",
" 'index': 315,\n",
" 'word': '.',\n",
" 'start': 1310,\n",
" 'end': 1311},\n",
" {'entity': 'ne',\n",
" 'score': 0.99906963,\n",
" 'index': 316,\n",
" 'word': 'Michael',\n",
" 'start': 1312,\n",
" 'end': 1319},\n",
" {'entity': 'ne',\n",
" 'score': 0.9983594,\n",
" 'index': 317,\n",
" 'word': 'Mali',\n",
" 'start': 1320,\n",
" 'end': 1324},\n",
" {'entity': 'ne',\n",
" 'score': 0.9985464,\n",
" 'index': 318,\n",
" 'word': '##n',\n",
" 'start': 1324,\n",
" 'end': 1325},\n",
" {'entity': 'en',\n",
" 'score': 0.99965477,\n",
" 'index': 319,\n",
" 'word': 'and',\n",
" 'start': 1326,\n",
" 'end': 1329},\n",
" {'entity': 'en',\n",
" 'score': 0.9990552,\n",
" 'index': 320,\n",
" 'word': 'his',\n",
" 'start': 1330,\n",
" 'end': 1333},\n",
" {'entity': 'ne',\n",
" 'score': 0.97999007,\n",
" 'index': 321,\n",
" 'word': 'Mars',\n",
" 'start': 1334,\n",
" 'end': 1338},\n",
" {'entity': 'ne',\n",
" 'score': 0.6277367,\n",
" 'index': 322,\n",
" 'word': 'Or',\n",
" 'start': 1339,\n",
" 'end': 1341},\n",
" {'entity': 'ne',\n",
" 'score': 0.5293862,\n",
" 'index': 323,\n",
" 'word': '##biter',\n",
" 'start': 1341,\n",
" 'end': 1346},\n",
" {'entity': 'en',\n",
" 'score': 0.9995109,\n",
" 'index': 324,\n",
" 'word': 'camera',\n",
" 'start': 1347,\n",
" 'end': 1353},\n",
" {'entity': 'en',\n",
" 'score': 0.9995858,\n",
" 'index': 325,\n",
" 'word': 'team',\n",
" 'start': 1354,\n",
" 'end': 1358},\n",
" {'entity': 'en',\n",
" 'score': 0.99904686,\n",
" 'index': 326,\n",
" 'word': 'took',\n",
" 'start': 1359,\n",
" 'end': 1363},\n",
" {'entity': 'en',\n",
" 'score': 0.99744225,\n",
" 'index': 327,\n",
" 'word': 'a',\n",
" 'start': 1364,\n",
" 'end': 1365},\n",
" {'entity': 'en',\n",
" 'score': 0.99924374,\n",
" 'index': 328,\n",
" 'word': 'picture',\n",
" 'start': 1366,\n",
" 'end': 1373},\n",
" {'entity': 'en',\n",
" 'score': 0.99940455,\n",
" 'index': 329,\n",
" 'word': 'that',\n",
" 'start': 1374,\n",
" 'end': 1378},\n",
" {'entity': 'en',\n",
" 'score': 0.9994288,\n",
" 'index': 330,\n",
" 'word': 'was',\n",
" 'start': 1379,\n",
" 'end': 1382},\n",
" {'entity': 'en',\n",
" 'score': 0.99890625,\n",
" 'index': 331,\n",
" 'word': 'ten',\n",
" 'start': 1383,\n",
" 'end': 1386},\n",
" {'entity': 'en',\n",
" 'score': 0.9993057,\n",
" 'index': 332,\n",
" 'word': 'times',\n",
" 'start': 1387,\n",
" 'end': 1392},\n",
" {'entity': 'en',\n",
" 'score': 0.99956423,\n",
" 'index': 333,\n",
" 'word': 'sharp',\n",
" 'start': 1393,\n",
" 'end': 1398},\n",
" {'entity': 'en',\n",
" 'score': 0.99929535,\n",
" 'index': 334,\n",
" 'word': '##er',\n",
" 'start': 1398,\n",
" 'end': 1400},\n",
" {'entity': 'en',\n",
" 'score': 0.9995135,\n",
" 'index': 335,\n",
" 'word': 'than',\n",
" 'start': 1401,\n",
" 'end': 1405},\n",
" {'entity': 'en',\n",
" 'score': 0.99948967,\n",
" 'index': 336,\n",
" 'word': 'the',\n",
" 'start': 1406,\n",
" 'end': 1409},\n",
" {'entity': 'en',\n",
" 'score': 0.99922657,\n",
" 'index': 337,\n",
" 'word': 'original',\n",
" 'start': 1410,\n",
" 'end': 1418},\n",
" {'entity': 'en',\n",
" 'score': 0.54231477,\n",
" 'index': 338,\n",
" 'word': 'Viking',\n",
" 'start': 1419,\n",
" 'end': 1425},\n",
" {'entity': 'en',\n",
" 'score': 0.9983171,\n",
" 'index': 339,\n",
" 'word': 'photos',\n",
" 'start': 1426,\n",
" 'end': 1432},\n",
" {'entity': 'other',\n",
" 'score': 0.9999213,\n",
" 'index': 340,\n",
" 'word': ',',\n",
" 'start': 1432,\n",
" 'end': 1433},\n",
" {'entity': 'en',\n",
" 'score': 0.9995522,\n",
" 'index': 341,\n",
" 'word': 'reveal',\n",
" 'start': 1434,\n",
" 'end': 1440},\n",
" {'entity': 'en',\n",
" 'score': 0.9996402,\n",
" 'index': 342,\n",
" 'word': '##ing',\n",
" 'start': 1440,\n",
" 'end': 1443},\n",
" {'entity': 'en',\n",
" 'score': 0.9978428,\n",
" 'index': 343,\n",
" 'word': 'a',\n",
" 'start': 1444,\n",
" 'end': 1445},\n",
" {'entity': 'en',\n",
" 'score': 0.99874496,\n",
" 'index': 344,\n",
" 'word': 'natural',\n",
" 'start': 1446,\n",
" 'end': 1453},\n",
" {'entity': 'en',\n",
" 'score': 0.99962974,\n",
" 'index': 345,\n",
" 'word': 'land',\n",
" 'start': 1454,\n",
" 'end': 1458},\n",
" {'entity': 'en',\n",
" 'score': 0.99966383,\n",
" 'index': 346,\n",
" 'word': '##form',\n",
" 'start': 1458,\n",
" 'end': 1462},\n",
" {'entity': 'other',\n",
" 'score': 0.99993026,\n",
" 'index': 347,\n",
" 'word': ',',\n",
" 'start': 1462,\n",
" 'end': 1463},\n",
" {'entity': 'en',\n",
" 'score': 0.99916697,\n",
" 'index': 348,\n",
" 'word': 'which',\n",
" 'start': 1464,\n",
" 'end': 1469},\n",
" {'entity': 'en',\n",
" 'score': 0.9983835,\n",
" 'index': 349,\n",
" 'word': 'meant',\n",
" 'start': 1470,\n",
" 'end': 1475},\n",
" {'entity': 'en',\n",
" 'score': 0.9928202,\n",
" 'index': 350,\n",
" 'word': 'no',\n",
" 'start': 1476,\n",
" 'end': 1478},\n",
" {'entity': 'en',\n",
" 'score': 0.9962263,\n",
" 'index': 351,\n",
" 'word': 'alien',\n",
" 'start': 1479,\n",
" 'end': 1484},\n",
" {'entity': 'en',\n",
" 'score': 0.9987729,\n",
" 'index': 352,\n",
" 'word': 'monument',\n",
" 'start': 1485,\n",
" 'end': 1493},\n",
" {'entity': 'other',\n",
" 'score': 0.9999304,\n",
" 'index': 353,\n",
" 'word': '.',\n",
" 'start': 1493,\n",
" 'end': 1494},\n",
" {'entity': 'other',\n",
" 'score': 0.99991786,\n",
" 'index': 354,\n",
" 'word': '\"',\n",
" 'start': 1495,\n",
" 'end': 1496},\n",
" {'entity': 'en',\n",
" 'score': 0.99973565,\n",
" 'index': 355,\n",
" 'word': 'But',\n",
" 'start': 1496,\n",
" 'end': 1499},\n",
" {'entity': 'en',\n",
" 'score': 0.9994904,\n",
" 'index': 356,\n",
" 'word': 'that',\n",
" 'start': 1500,\n",
" 'end': 1504},\n",
" {'entity': 'en',\n",
" 'score': 0.9995603,\n",
" 'index': 357,\n",
" 'word': 'picture',\n",
" 'start': 1505,\n",
" 'end': 1512},\n",
" {'entity': 'en',\n",
" 'score': 0.99966073,\n",
" 'index': 358,\n",
" 'word': 'wasn',\n",
" 'start': 1513,\n",
" 'end': 1517},\n",
" {'entity': 'en',\n",
" 'score': 0.99970895,\n",
" 'index': 359,\n",
" 'word': \"'\",\n",
" 'start': 1517,\n",
" 'end': 1518},\n",
" {'entity': 'en',\n",
" 'score': 0.99959975,\n",
" 'index': 360,\n",
" 'word': 't',\n",
" 'start': 1518,\n",
" 'end': 1519},\n",
" {'entity': 'en',\n",
" 'score': 0.9995778,\n",
" 'index': 361,\n",
" 'word': 'very',\n",
" 'start': 1520,\n",
" 'end': 1524},\n",
" {'entity': 'en',\n",
" 'score': 0.99971443,\n",
" 'index': 362,\n",
" 'word': 'clear',\n",
" 'start': 1525,\n",
" 'end': 1530},\n",
" {'entity': 'en',\n",
" 'score': 0.9997217,\n",
" 'index': 363,\n",
" 'word': 'at',\n",
" 'start': 1531,\n",
" 'end': 1533},\n",
" {'entity': 'en',\n",
" 'score': 0.99960893,\n",
" 'index': 364,\n",
" 'word': 'all',\n",
" 'start': 1534,\n",
" 'end': 1537},\n",
" {'entity': 'other',\n",
" 'score': 0.99993,\n",
" 'index': 365,\n",
" 'word': ',',\n",
" 'start': 1537,\n",
" 'end': 1538},\n",
" {'entity': 'en',\n",
" 'score': 0.99955565,\n",
" 'index': 366,\n",
" 'word': 'which',\n",
" 'start': 1539,\n",
" 'end': 1544},\n",
" {'entity': 'en',\n",
" 'score': 0.99938047,\n",
" 'index': 367,\n",
" 'word': 'could',\n",
" 'start': 1545,\n",
" 'end': 1550},\n",
" {'entity': 'en',\n",
" 'score': 0.99928325,\n",
" 'index': 368,\n",
" 'word': 'mean',\n",
" 'start': 1551,\n",
" 'end': 1555},\n",
" {'entity': 'en',\n",
" 'score': 0.9982596,\n",
" 'index': 369,\n",
" 'word': 'alien',\n",
" 'start': 1556,\n",
" 'end': 1561},\n",
" {'entity': 'en',\n",
" 'score': 0.99910384,\n",
" 'index': 370,\n",
" 'word': 'marking',\n",
" 'start': 1562,\n",
" 'end': 1569},\n",
" {'entity': 'en',\n",
" 'score': 0.99916995,\n",
" 'index': 371,\n",
" 'word': '##s',\n",
" 'start': 1569,\n",
" 'end': 1570},\n",
" {'entity': 'en',\n",
" 'score': 0.99937785,\n",
" 'index': 372,\n",
" 'word': 'were',\n",
" 'start': 1571,\n",
" 'end': 1575},\n",
" {'entity': 'en',\n",
" 'score': 0.99948174,\n",
" 'index': 373,\n",
" 'word': 'hidden',\n",
" 'start': 1576,\n",
" 'end': 1582},\n",
" {'entity': 'en',\n",
" 'score': 0.99941933,\n",
" 'index': 374,\n",
" 'word': 'by',\n",
" 'start': 1583,\n",
" 'end': 1585},\n",
" {'entity': 'en',\n",
" 'score': 0.99944514,\n",
" 'index': 375,\n",
" 'word': 'ha',\n",
" 'start': 1586,\n",
" 'end': 1588},\n",
" {'entity': 'en',\n",
" 'score': 0.9994628,\n",
" 'index': 376,\n",
" 'word': '##ze',\n",
" 'start': 1588,\n",
" 'end': 1590},\n",
" {'entity': 'other',\n",
" 'score': 0.9999194,\n",
" 'index': 377,\n",
" 'word': '\"',\n",
" 'start': 1590,\n",
" 'end': 1591},\n",
" {'entity': 'en',\n",
" 'score': 0.9996394,\n",
" 'index': 378,\n",
" 'word': 'Well',\n",
" 'start': 1592,\n",
" 'end': 1596},\n",
" {'entity': 'en',\n",
" 'score': 0.99759346,\n",
" 'index': 379,\n",
" 'word': 'no',\n",
" 'start': 1597,\n",
" 'end': 1599},\n",
" {'entity': 'other',\n",
" 'score': 0.99993074,\n",
" 'index': 380,\n",
" 'word': ',',\n",
" 'start': 1599,\n",
" 'end': 1600},\n",
" {'entity': 'en',\n",
" 'score': 0.9995228,\n",
" 'index': 381,\n",
" 'word': 'ye',\n",
" 'start': 1601,\n",
" 'end': 1603},\n",
" {'entity': 'en',\n",
" 'score': 0.99969923,\n",
" 'index': 382,\n",
" 'word': '##s',\n",
" 'start': 1603,\n",
" 'end': 1604},\n",
" {'entity': 'en',\n",
" 'score': 0.99970835,\n",
" 'index': 383,\n",
" 'word': 'that',\n",
" 'start': 1605,\n",
" 'end': 1609},\n",
" {'entity': 'en',\n",
" 'score': 0.999665,\n",
" 'index': 384,\n",
" 'word': 'rum',\n",
" 'start': 1610,\n",
" 'end': 1613},\n",
" {'entity': 'en',\n",
" 'score': 0.99970514,\n",
" 'index': 385,\n",
" 'word': '##or',\n",
" 'start': 1613,\n",
" 'end': 1615},\n",
" {'entity': 'en',\n",
" 'score': 0.99943703,\n",
" 'index': 386,\n",
" 'word': 'started',\n",
" 'start': 1616,\n",
" 'end': 1623},\n",
" {'entity': 'other',\n",
" 'score': 0.9999294,\n",
" 'index': 387,\n",
" 'word': ',',\n",
" 'start': 1623,\n",
" 'end': 1624},\n",
" {'entity': 'en',\n",
" 'score': 0.99961936,\n",
" 'index': 388,\n",
" 'word': 'but',\n",
" 'start': 1625,\n",
" 'end': 1628},\n",
" {'entity': 'en',\n",
" 'score': 0.99959284,\n",
" 'index': 389,\n",
" 'word': 'to',\n",
" 'start': 1629,\n",
" 'end': 1631},\n",
" {'entity': 'en',\n",
" 'score': 0.99967396,\n",
" 'index': 390,\n",
" 'word': 'prove',\n",
" 'start': 1632,\n",
" 'end': 1637},\n",
" {'entity': 'en',\n",
" 'score': 0.9996499,\n",
" 'index': 391,\n",
" 'word': 'them',\n",
" 'start': 1638,\n",
" 'end': 1642},\n",
" {'entity': 'en',\n",
" 'score': 0.9996276,\n",
" 'index': 392,\n",
" 'word': 'wrong',\n",
" 'start': 1643,\n",
" 'end': 1648},\n",
" {'entity': 'en',\n",
" 'score': 0.99944097,\n",
" 'index': 393,\n",
" 'word': 'on',\n",
" 'start': 1649,\n",
" 'end': 1651},\n",
" {'entity': 'en',\n",
" 'score': 0.61485404,\n",
" 'index': 394,\n",
" 'word': 'April',\n",
" 'start': 1652,\n",
" 'end': 1657},\n",
" {'entity': 'other',\n",
" 'score': 0.99845207,\n",
" 'index': 395,\n",
" 'word': '8',\n",
" 'start': 1658,\n",
" 'end': 1659},\n",
" {'entity': 'other',\n",
" 'score': 0.9998248,\n",
" 'index': 396,\n",
" 'word': ',',\n",
" 'start': 1659,\n",
" 'end': 1660},\n",
" {'entity': 'other',\n",
" 'score': 0.967133,\n",
" 'index': 397,\n",
" 'word': '2001',\n",
" 'start': 1661,\n",
" 'end': 1665},\n",
" {'entity': 'en',\n",
" 'score': 0.99961036,\n",
" 'index': 398,\n",
" 'word': 'we',\n",
" 'start': 1666,\n",
" 'end': 1668},\n",
" {'entity': 'en',\n",
" 'score': 0.9996408,\n",
" 'index': 399,\n",
" 'word': 'decided',\n",
" 'start': 1669,\n",
" 'end': 1676},\n",
" {'entity': 'en',\n",
" 'score': 0.99961096,\n",
" 'index': 400,\n",
" 'word': 'to',\n",
" 'start': 1677,\n",
" 'end': 1679},\n",
" {'entity': 'en',\n",
" 'score': 0.9994717,\n",
" 'index': 401,\n",
" 'word': 'take',\n",
" 'start': 1680,\n",
" 'end': 1684},\n",
" {'entity': 'en',\n",
" 'score': 0.99907696,\n",
" 'index': 402,\n",
" 'word': 'another',\n",
" 'start': 1685,\n",
" 'end': 1692},\n",
" {'entity': 'en',\n",
" 'score': 0.9993956,\n",
" 'index': 403,\n",
" 'word': 'picture',\n",
" 'start': 1693,\n",
" 'end': 1700},\n",
" {'entity': 'other',\n",
" 'score': 0.9999089,\n",
" 'index': 404,\n",
" 'word': ',',\n",
" 'start': 1700,\n",
" 'end': 1701},\n",
" {'entity': 'en',\n",
" 'score': 0.9996431,\n",
" 'index': 405,\n",
" 'word': 'making',\n",
" 'start': 1702,\n",
" 'end': 1708},\n",
" {'entity': 'en',\n",
" 'score': 0.9997148,\n",
" 'index': 406,\n",
" 'word': 'sure',\n",
" 'start': 1709,\n",
" 'end': 1713},\n",
" {'entity': 'en',\n",
" 'score': 0.9997253,\n",
" 'index': 407,\n",
" 'word': 'it',\n",
" 'start': 1714,\n",
" 'end': 1716},\n",
" {'entity': 'en',\n",
" 'score': 0.99960643,\n",
" 'index': 408,\n",
" 'word': 'was',\n",
" 'start': 1717,\n",
" 'end': 1720},\n",
" {'entity': 'en',\n",
" 'score': 0.9989556,\n",
" 'index': 409,\n",
" 'word': 'a',\n",
" 'start': 1721,\n",
" 'end': 1722},\n",
" {'entity': 'en',\n",
" 'score': 0.99975353,\n",
" 'index': 410,\n",
" 'word': 'cloud',\n",
" 'start': 1723,\n",
" 'end': 1728},\n",
" {'entity': 'en',\n",
" 'score': 0.999777,\n",
" 'index': 411,\n",
" 'word': '##less',\n",
" 'start': 1728,\n",
" 'end': 1732},\n",
" {'entity': 'en',\n",
" 'score': 0.9997631,\n",
" 'index': 412,\n",
" 'word': 'summer',\n",
" 'start': 1733,\n",
" 'end': 1739},\n",
" {'entity': 'en',\n",
" 'score': 0.99969566,\n",
" 'index': 413,\n",
" 'word': 'day',\n",
" 'start': 1740,\n",
" 'end': 1743},\n",
" {'entity': 'other',\n",
" 'score': 0.9999254,\n",
" 'index': 414,\n",
" 'word': '.',\n",
" 'start': 1743,\n",
" 'end': 1744},\n",
" {'entity': 'ne',\n",
" 'score': 0.9956275,\n",
" 'index': 415,\n",
" 'word': 'Mali',\n",
" 'start': 1745,\n",
" 'end': 1749},\n",
" {'entity': 'ne',\n",
" 'score': 0.9934854,\n",
" 'index': 416,\n",
" 'word': '##n',\n",
" 'start': 1749,\n",
" 'end': 1750},\n",
" {'entity': 'en',\n",
" 'score': 0.9940276,\n",
" 'index': 417,\n",
" 'word': \"'\",\n",
" 'start': 1750,\n",
" 'end': 1751},\n",
" {'entity': 'en',\n",
" 'score': 0.9994653,\n",
" 'index': 418,\n",
" 'word': 's',\n",
" 'start': 1751,\n",
" 'end': 1752},\n",
" {'entity': 'en',\n",
" 'score': 0.99972516,\n",
" 'index': 419,\n",
" 'word': 'team',\n",
" 'start': 1753,\n",
" 'end': 1757},\n",
" {'entity': 'en',\n",
" 'score': 0.9996685,\n",
" 'index': 420,\n",
" 'word': 'captured',\n",
" 'start': 1758,\n",
" 'end': 1766},\n",
" {'entity': 'en',\n",
" 'score': 0.99941444,\n",
" 'index': 421,\n",
" 'word': 'an',\n",
" 'start': 1767,\n",
" 'end': 1769},\n",
" {'entity': 'en',\n",
" 'score': 0.99968505,\n",
" 'index': 422,\n",
" 'word': 'ama',\n",
" 'start': 1770,\n",
" 'end': 1773},\n",
" {'entity': 'en',\n",
" 'score': 0.99972624,\n",
" 'index': 423,\n",
" 'word': '##zing',\n",
" 'start': 1773,\n",
" 'end': 1777},\n",
" {'entity': 'en',\n",
" 'score': 0.9996338,\n",
" 'index': 424,\n",
" 'word': 'photo',\n",
" 'start': 1778,\n",
" 'end': 1783},\n",
" {'entity': 'en',\n",
" 'score': 0.9996408,\n",
" 'index': 425,\n",
" 'word': 'using',\n",
" 'start': 1784,\n",
" 'end': 1789},\n",
" {'entity': 'en',\n",
" 'score': 0.9994491,\n",
" 'index': 426,\n",
" 'word': 'the',\n",
" 'start': 1790,\n",
" 'end': 1793},\n",
" {'entity': 'en',\n",
" 'score': 0.99961936,\n",
" 'index': 427,\n",
" 'word': 'camera',\n",
" 'start': 1794,\n",
" 'end': 1800},\n",
" {'entity': 'en',\n",
" 'score': 0.9996668,\n",
" 'index': 428,\n",
" 'word': \"'\",\n",
" 'start': 1800,\n",
" 'end': 1801},\n",
" {'entity': 'en',\n",
" 'score': 0.9995703,\n",
" 'index': 429,\n",
" 'word': 's',\n",
" 'start': 1801,\n",
" 'end': 1802},\n",
" {'entity': 'en',\n",
" 'score': 0.99967444,\n",
" 'index': 430,\n",
" 'word': 'absolute',\n",
" 'start': 1803,\n",
" 'end': 1811},\n",
" {'entity': 'en',\n",
" 'score': 0.99970645,\n",
" 'index': 431,\n",
" 'word': 'maximum',\n",
" 'start': 1812,\n",
" 'end': 1819},\n",
" {'entity': 'en',\n",
" 'score': 0.9996973,\n",
" 'index': 432,\n",
" 'word': 'revolution',\n",
" 'start': 1820,\n",
" 'end': 1830},\n",
" {'entity': 'other',\n",
" 'score': 0.99992955,\n",
" 'index': 433,\n",
" 'word': '.',\n",
" 'start': 1830,\n",
" 'end': 1831},\n",
" {'entity': 'en',\n",
" 'score': 0.9997081,\n",
" 'index': 434,\n",
" 'word': 'With',\n",
" 'start': 1832,\n",
" 'end': 1836},\n",
" {'entity': 'en',\n",
" 'score': 0.99963665,\n",
" 'index': 435,\n",
" 'word': 'this',\n",
" 'start': 1837,\n",
" 'end': 1841},\n",
" {'entity': 'en',\n",
" 'score': 0.99965286,\n",
" 'index': 436,\n",
" 'word': 'camera',\n",
" 'start': 1842,\n",
" 'end': 1848},\n",
" {'entity': 'en',\n",
" 'score': 0.9997528,\n",
" 'index': 437,\n",
" 'word': 'you',\n",
" 'start': 1849,\n",
" 'end': 1852},\n",
" {'entity': 'en',\n",
" 'score': 0.9997576,\n",
" 'index': 438,\n",
" 'word': 'can',\n",
" 'start': 1853,\n",
" 'end': 1856},\n",
" {'entity': 'en',\n",
" 'score': 0.9997657,\n",
" 'index': 439,\n",
" 'word': 'disc',\n",
" 'start': 1857,\n",
" 'end': 1861},\n",
" {'entity': 'en',\n",
" 'score': 0.9997956,\n",
" 'index': 440,\n",
" 'word': '##ern',\n",
" 'start': 1861,\n",
" 'end': 1864},\n",
" {'entity': 'en',\n",
" 'score': 0.9997335,\n",
" 'index': 441,\n",
" 'word': 'things',\n",
" 'start': 1865,\n",
" 'end': 1871},\n",
" {'entity': 'en',\n",
" 'score': 0.999653,\n",
" 'index': 442,\n",
" 'word': 'in',\n",
" 'start': 1872,\n",
" 'end': 1874},\n",
" {'entity': 'en',\n",
" 'score': 0.9991596,\n",
" 'index': 443,\n",
" 'word': 'a',\n",
" 'start': 1875,\n",
" 'end': 1876},\n",
" {'entity': 'en',\n",
" 'score': 0.99945194,\n",
" 'index': 444,\n",
" 'word': 'digital',\n",
" 'start': 1877,\n",
" 'end': 1884},\n",
" {'entity': 'en',\n",
" 'score': 0.9995933,\n",
" 'index': 445,\n",
" 'word': 'image',\n",
" 'start': 1885,\n",
" 'end': 1890},\n",
" {'entity': 'other',\n",
" 'score': 0.9999232,\n",
" 'index': 446,\n",
" 'word': ',',\n",
" 'start': 1890,\n",
" 'end': 1891},\n",
" {'entity': 'other',\n",
" 'score': 0.99489,\n",
" 'index': 447,\n",
" 'word': '3',\n",
" 'start': 1892,\n",
" 'end': 1893},\n",
" {'entity': 'en',\n",
" 'score': 0.9991105,\n",
" 'index': 448,\n",
" 'word': 'times',\n",
" 'start': 1894,\n",
" 'end': 1899},\n",
" {'entity': 'en',\n",
" 'score': 0.9994937,\n",
" 'index': 449,\n",
" 'word': 'bigger',\n",
" 'start': 1900,\n",
" 'end': 1906},\n",
" {'entity': 'en',\n",
" 'score': 0.9995734,\n",
" 'index': 450,\n",
" 'word': 'than',\n",
" 'start': 1907,\n",
" 'end': 1911},\n",
" {'entity': 'en',\n",
" 'score': 0.9995715,\n",
" 'index': 451,\n",
" 'word': 'the',\n",
" 'start': 1912,\n",
" 'end': 1915},\n",
" {'entity': 'en',\n",
" 'score': 0.9996369,\n",
" 'index': 452,\n",
" 'word': 'pi',\n",
" 'start': 1916,\n",
" 'end': 1918},\n",
" {'entity': 'en',\n",
" 'score': 0.9996636,\n",
" 'index': 453,\n",
" 'word': '##xel',\n",
" 'start': 1918,\n",
" 'end': 1921},\n",
" {'entity': 'en',\n",
" 'score': 0.99961394,\n",
" 'index': 454,\n",
" 'word': 'size',\n",
" 'start': 1922,\n",
" 'end': 1926},\n",
" {'entity': 'en',\n",
" 'score': 0.9996786,\n",
" 'index': 455,\n",
" 'word': 'which',\n",
" 'start': 1927,\n",
" 'end': 1932},\n",
" {'entity': 'en',\n",
" 'score': 0.9996667,\n",
" 'index': 456,\n",
" 'word': 'means',\n",
" 'start': 1933,\n",
" 'end': 1938},\n",
" {'entity': 'en',\n",
" 'score': 0.9997652,\n",
" 'index': 457,\n",
" 'word': 'if',\n",
" 'start': 1939,\n",
" 'end': 1941},\n",
" {'entity': 'en',\n",
" 'score': 0.99967515,\n",
" 'index': 458,\n",
" 'word': 'there',\n",
" 'start': 1942,\n",
" 'end': 1947},\n",
" {'entity': 'en',\n",
" 'score': 0.9996507,\n",
" 'index': 459,\n",
" 'word': 'were',\n",
" 'start': 1948,\n",
" 'end': 1952},\n",
" {'entity': 'en',\n",
" 'score': 0.99955803,\n",
" 'index': 460,\n",
" 'word': 'any',\n",
" 'start': 1953,\n",
" 'end': 1956},\n",
" {'entity': 'en',\n",
" 'score': 0.99953544,\n",
" 'index': 461,\n",
" 'word': 'signs',\n",
" 'start': 1957,\n",
" 'end': 1962},\n",
" {'entity': 'en',\n",
" 'score': 0.99960965,\n",
" 'index': 462,\n",
" 'word': 'of',\n",
" 'start': 1963,\n",
" 'end': 1965},\n",
" {'entity': 'en',\n",
" 'score': 0.99961805,\n",
" 'index': 463,\n",
" 'word': 'life',\n",
" 'start': 1966,\n",
" 'end': 1970},\n",
" {'entity': 'other',\n",
" 'score': 0.9999225,\n",
" 'index': 464,\n",
" 'word': ',',\n",
" 'start': 1970,\n",
" 'end': 1971},\n",
" {'entity': 'en',\n",
" 'score': 0.9997342,\n",
" 'index': 465,\n",
" 'word': 'you',\n",
" 'start': 1972,\n",
" 'end': 1975},\n",
" {'entity': 'en',\n",
" 'score': 0.99973625,\n",
" 'index': 466,\n",
" 'word': 'could',\n",
" 'start': 1976,\n",
" 'end': 1981},\n",
" {'entity': 'en',\n",
" 'score': 0.9997397,\n",
" 'index': 467,\n",
" 'word': 'easily',\n",
" 'start': 1982,\n",
" 'end': 1988},\n",
" {'entity': 'en',\n",
" 'score': 0.99969923,\n",
" 'index': 468,\n",
" 'word': 'see',\n",
" 'start': 1989,\n",
" 'end': 1992},\n",
" {'entity': 'en',\n",
" 'score': 0.999673,\n",
" 'index': 469,\n",
" 'word': 'what',\n",
" 'start': 1993,\n",
" 'end': 1997},\n",
" {'entity': 'en',\n",
" 'score': 0.9996773,\n",
" 'index': 470,\n",
" 'word': 'they',\n",
" 'start': 1998,\n",
" 'end': 2002},\n",
" {'entity': 'en',\n",
" 'score': 0.9995364,\n",
" 'index': 471,\n",
" 'word': 'were',\n",
" 'start': 2003,\n",
" 'end': 2007},\n",
" {'entity': 'other',\n",
" 'score': 0.99993205,\n",
" 'index': 472,\n",
" 'word': '.',\n",
" 'start': 2007,\n",
" 'end': 2008},\n",
" {'entity': 'en',\n",
" 'score': 0.99950016,\n",
" 'index': 473,\n",
" 'word': 'What',\n",
" 'start': 2009,\n",
" 'end': 2013},\n",
" {'entity': 'en',\n",
" 'score': 0.999288,\n",
" 'index': 474,\n",
" 'word': 'the',\n",
" 'start': 2014,\n",
" 'end': 2017},\n",
" {'entity': 'en',\n",
" 'score': 0.99938726,\n",
" 'index': 475,\n",
" 'word': 'picture',\n",
" 'start': 2018,\n",
" 'end': 2025},\n",
" {'entity': 'en',\n",
" 'score': 0.9994843,\n",
" 'index': 476,\n",
" 'word': 'showed',\n",
" 'start': 2026,\n",
" 'end': 2032},\n",
" {'entity': 'en',\n",
" 'score': 0.99938786,\n",
" 'index': 477,\n",
" 'word': 'was',\n",
" 'start': 2033,\n",
" 'end': 2036},\n",
" {'entity': 'en',\n",
" 'score': 0.99811554,\n",
" 'index': 478,\n",
" 'word': 'the',\n",
" 'start': 2037,\n",
" 'end': 2040},\n",
" {'entity': 'en',\n",
" 'score': 0.9786213,\n",
" 'index': 479,\n",
" 'word': 'but',\n",
" 'start': 2041,\n",
" 'end': 2044},\n",
" {'entity': 'en',\n",
" 'score': 0.99004257,\n",
" 'index': 480,\n",
" 'word': '##te',\n",
" 'start': 2044,\n",
" 'end': 2046},\n",
" {'entity': 'en',\n",
" 'score': 0.99612516,\n",
" 'index': 481,\n",
" 'word': 'or',\n",
" 'start': 2047,\n",
" 'end': 2049},\n",
" {'entity': 'en',\n",
" 'score': 0.87278074,\n",
" 'index': 482,\n",
" 'word': 'mesa',\n",
" 'start': 2050,\n",
" 'end': 2054},\n",
" {'entity': 'other',\n",
" 'score': 0.9999268,\n",
" 'index': 483,\n",
" 'word': ',',\n",
" 'start': 2054,\n",
" 'end': 2055},\n",
" {'entity': 'en',\n",
" 'score': 0.99910945,\n",
" 'index': 484,\n",
" 'word': 'which',\n",
" 'start': 2056,\n",
" 'end': 2061},\n",
" {'entity': 'en',\n",
" 'score': 0.9985214,\n",
" 'index': 485,\n",
" 'word': 'are',\n",
" 'start': 2062,\n",
" 'end': 2065},\n",
" {'entity': 'en',\n",
" 'score': 0.9983,\n",
" 'index': 486,\n",
" 'word': 'land',\n",
" 'start': 2066,\n",
" 'end': 2070},\n",
" {'entity': 'en',\n",
" 'score': 0.9991359,\n",
" 'index': 487,\n",
" 'word': '##form',\n",
" 'start': 2070,\n",
" 'end': 2074},\n",
" {'entity': 'en',\n",
" 'score': 0.99933463,\n",
" 'index': 488,\n",
" 'word': '##s',\n",
" 'start': 2074,\n",
" 'end': 2075},\n",
" {'entity': 'en',\n",
" 'score': 0.9994253,\n",
" 'index': 489,\n",
" 'word': 'common',\n",
" 'start': 2076,\n",
" 'end': 2082},\n",
" {'entity': 'en',\n",
" 'score': 0.9994831,\n",
" 'index': 490,\n",
" 'word': 'around',\n",
" 'start': 2083,\n",
" 'end': 2089},\n",
" {'entity': 'en',\n",
" 'score': 0.99935335,\n",
" 'index': 491,\n",
" 'word': 'the',\n",
" 'start': 2090,\n",
" 'end': 2093},\n",
" {'entity': 'en',\n",
" 'score': 0.9982742,\n",
" 'index': 492,\n",
" 'word': 'American',\n",
" 'start': 2094,\n",
" 'end': 2102},\n",
" {'entity': 'en',\n",
" 'score': 0.99908066,\n",
" 'index': 493,\n",
" 'word': 'West',\n",
" 'start': 2103,\n",
" 'end': 2107},\n",
" {'entity': 'other',\n",
" 'score': 0.9999201,\n",
" 'index': 494,\n",
" 'word': '.',\n",
" 'start': 2107,\n",
" 'end': 2108}]"
]
},
"execution_count": 127,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline\n",
"\n",
"tokenizer = AutoTokenizer.from_pretrained(\"sagorsarker/codeswitch-spaeng-lid-lince\")\n",
"\n",
"model = AutoModelForTokenClassification.from_pretrained(\"sagorsarker/codeswitch-spaeng-lid-lince\")\n",
"lid_model = pipeline('ner', model=model, tokenizer=tokenizer)\n",
"\n",
"lid_model(text)"
]
},
{
"cell_type": "code",
"execution_count": 130,
"id": "867c77f9-b9d8-4f29-b07f-cab4ee55a8f3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"en 411\n",
"ne 22\n",
"other 61\n",
"dtype: int64\n"
]
},
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" entity | \n",
" score | \n",
"
\n",
" \n",
" entity | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" en | \n",
" 411 | \n",
" 0.996325 | \n",
"
\n",
" \n",
" ne | \n",
" 22 | \n",
" 0.925739 | \n",
"
\n",
" \n",
" other | \n",
" 61 | \n",
" 0.990110 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" entity score\n",
"entity \n",
"en 411 0.996325\n",
"ne 22 0.925739\n",
"other 61 0.990110"
]
},
"execution_count": 130,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open(\"16 sagorsarkercodeswitch-spaeng-lid-lince.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()\n",
"\n",
"\n",
"aux.groupby(['entity']) \\\n",
" .agg({'entity':'size', 'score':'mean'}) \n"
]
},
{
"cell_type": "markdown",
"id": "88d47015-a58d-4c0b-94b2-05ac3038199a",
"metadata": {},
"source": [
"## 17 jvdzwaan/ocrpostcorrection-task-1"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "dee93220-b20b-4d65-9bd0-18342703e328",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\NW\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
" warnings.warn(\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ca718fb5673b4223bddcd4cb42fd9596",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"config.json: 0%| | 0.00/1.02k [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\NW\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:157: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\NW\\.cache\\huggingface\\hub\\models--jvdzwaan--ocrpostcorrection-task-1. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
"To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
" warnings.warn(message)\n"
]
},
{
"ename": "OSError",
"evalue": "Can't load tokenizer for 'jvdzwaan/ocrpostcorrection-task-1'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'jvdzwaan/ocrpostcorrection-task-1' is the correct path to a directory containing all relevant files for a BertTokenizerFast tokenizer.",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mOSError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[10], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mjson\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mjvdzwaan/ocrpostcorrection-task-1\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mjvdzwaan/ocrpostcorrection-task-1\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m classifier \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:899\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 896\u001b[0m tokenizer_class_py, tokenizer_class_fast \u001b[38;5;241m=\u001b[39m TOKENIZER_MAPPING[\u001b[38;5;28mtype\u001b[39m(config)]\n\u001b[0;32m 898\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tokenizer_class_fast \u001b[38;5;129;01mand\u001b[39;00m (use_fast \u001b[38;5;129;01mor\u001b[39;00m tokenizer_class_py \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[1;32m--> 899\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m tokenizer_class_fast\u001b[38;5;241m.\u001b[39mfrom_pretrained(pretrained_model_name_or_path, \u001b[38;5;241m*\u001b[39minputs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 900\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 901\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tokenizer_class_py \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\tokenization_utils_base.py:2094\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, trust_remote_code, *init_inputs, **kwargs)\u001b[0m\n\u001b[0;32m 2091\u001b[0m \u001b[38;5;66;03m# If one passes a GGUF file path to `gguf_file` there is no need for this check as the tokenizer will be\u001b[39;00m\n\u001b[0;32m 2092\u001b[0m \u001b[38;5;66;03m# loaded directly from the GGUF file.\u001b[39;00m\n\u001b[0;32m 2093\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mall\u001b[39m(full_file_name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mfor\u001b[39;00m full_file_name \u001b[38;5;129;01min\u001b[39;00m resolved_vocab_files\u001b[38;5;241m.\u001b[39mvalues()) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m gguf_file:\n\u001b[1;32m-> 2094\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[0;32m 2095\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCan\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt load tokenizer for \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m. If you were trying to load it from \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 2096\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttps://huggingface.co/models\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, make sure you don\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt have a local directory with the same name. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 2097\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOtherwise, make sure \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m is the correct path to a directory \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 2098\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontaining all relevant files for a \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m tokenizer.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 2099\u001b[0m )\n\u001b[0;32m 2101\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m file_id, file_path \u001b[38;5;129;01min\u001b[39;00m vocab_files\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m 2102\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m file_id \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m resolved_vocab_files:\n",
"\u001b[1;31mOSError\u001b[0m: Can't load tokenizer for 'jvdzwaan/ocrpostcorrection-task-1'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'jvdzwaan/ocrpostcorrection-task-1' is the correct path to a directory containing all relevant files for a BertTokenizerFast tokenizer."
]
}
],
"source": [
"import json\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"jvdzwaan/ocrpostcorrection-task-1\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"jvdzwaan/ocrpostcorrection-task-1\")\n",
"classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"salida=classifier(text)\n",
"print(salida)"
]
},
{
"cell_type": "markdown",
"id": "b7babbc0-5f6c-46a4-aea2-65487527ef54",
"metadata": {},
"source": [
"## 18 GEOcite/AuthorParserModel"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "a2ffcb2d-bc31-40e1-b4c6-66aeef61a845",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'entity': 'B-ORG', 'score': 0.56347305, 'index': 8, 'word': 'nasa', 'start': 16, 'end': 20}, {'entity': 'I-ORG', 'score': 0.64977086, 'index': 25, 'word': 'mars', 'start': 96, 'end': 100}, {'entity': 'I-ORG', 'score': 0.5621034, 'index': 37, 'word': 'mars', 'start': 152, 'end': 156}, {'entity': 'B-ORG', 'score': 0.83589464, 'index': 59, 'word': 'viking', 'start': 240, 'end': 246}, {'entity': 'I-ORG', 'score': 0.93603927, 'index': 60, 'word': '1', 'start': 247, 'end': 248}, {'entity': 'I-ORG', 'score': 0.67972714, 'index': 61, 'word': 'spacecraft', 'start': 249, 'end': 259}, {'entity': 'B-LOC', 'score': 0.47661397, 'index': 97, 'word': 'marti', 'start': 407, 'end': 412}, {'entity': 'I-ORG', 'score': 0.5451927, 'index': 98, 'word': '##an', 'start': 412, 'end': 414}, {'entity': 'I-ORG', 'score': 0.60608196, 'index': 99, 'word': 'mesa', 'start': 415, 'end': 419}, {'entity': 'B-LOC', 'score': 0.8834184, 'index': 103, 'word': 'c', 'start': 435, 'end': 436}, {'entity': 'I-LOC', 'score': 0.7397884, 'index': 104, 'word': '##yd', 'start': 436, 'end': 438}, {'entity': 'I-LOC', 'score': 0.9083945, 'index': 105, 'word': '##onia', 'start': 438, 'end': 442}, {'entity': 'B-ORG', 'score': 0.5038258, 'index': 118, 'word': 'egypt', 'start': 496, 'end': 501}, {'entity': 'I-ORG', 'score': 0.64455724, 'index': 119, 'word': '##ion', 'start': 501, 'end': 504}, {'entity': 'I-ORG', 'score': 0.6576205, 'index': 120, 'word': 'pha', 'start': 505, 'end': 508}, {'entity': 'I-ORG', 'score': 0.6960956, 'index': 121, 'word': '##rao', 'start': 508, 'end': 511}, {'entity': 'I-ORG', 'score': 0.61072016, 'index': 122, 'word': '##h', 'start': 511, 'end': 512}, {'entity': 'B-ORG', 'score': 0.7676731, 'index': 190, 'word': 'nasa', 'start': 801, 'end': 805}, {'entity': 'B-LOC', 'score': 0.37248164, 'index': 201, 'word': 'mars', 'start': 843, 'end': 847}, {'entity': 'I-ORG', 'score': 0.6403719, 'index': 211, 'word': 'mars', 'start': 875, 'end': 879}, {'entity': 'B-ORG', 'score': 0.6982709, 'index': 233, 'word': 'haunted', 'start': 972, 'end': 979}, {'entity': 'I-ORG', 'score': 0.8855793, 'index': 234, 'word': 'gr', 'start': 980, 'end': 982}, {'entity': 'I-ORG', 'score': 0.89122075, 'index': 235, 'word': '##oce', 'start': 982, 'end': 985}, {'entity': 'I-ORG', 'score': 0.91083044, 'index': 236, 'word': '##ry', 'start': 985, 'end': 987}, {'entity': 'I-ORG', 'score': 0.78272146, 'index': 237, 'word': 'store', 'start': 988, 'end': 993}, {'entity': 'I-ORG', 'score': 0.39987472, 'index': 257, 'word': 'mars', 'start': 1088, 'end': 1092}, {'entity': 'B-ORG', 'score': 0.37609065, 'index': 272, 'word': 'defenders', 'start': 1152, 'end': 1161}, {'entity': 'I-ORG', 'score': 0.49420744, 'index': 273, 'word': 'of', 'start': 1162, 'end': 1164}, {'entity': 'I-ORG', 'score': 0.65821886, 'index': 274, 'word': 'the', 'start': 1165, 'end': 1168}, {'entity': 'I-ORG', 'score': 0.626755, 'index': 275, 'word': 'nasa', 'start': 1169, 'end': 1173}, {'entity': 'I-ORG', 'score': 0.6965647, 'index': 281, 'word': 'civilization', 'start': 1204, 'end': 1216}, {'entity': 'I-ORG', 'score': 0.48566574, 'index': 282, 'word': 'on', 'start': 1217, 'end': 1219}, {'entity': 'I-ORG', 'score': 0.70331687, 'index': 283, 'word': 'mars', 'start': 1220, 'end': 1224}, {'entity': 'B-PER', 'score': 0.9669894, 'index': 308, 'word': 'michael', 'start': 1312, 'end': 1319}, {'entity': 'I-PER', 'score': 0.94802105, 'index': 309, 'word': 'mali', 'start': 1320, 'end': 1324}, {'entity': 'I-PER', 'score': 0.9340164, 'index': 310, 'word': '##n', 'start': 1324, 'end': 1325}, {'entity': 'B-ORG', 'score': 0.8970071, 'index': 313, 'word': 'mars', 'start': 1334, 'end': 1338}, {'entity': 'I-ORG', 'score': 0.93871564, 'index': 314, 'word': 'orbite', 'start': 1339, 'end': 1345}, {'entity': 'I-ORG', 'score': 0.9164465, 'index': 315, 'word': '##r', 'start': 1345, 'end': 1346}, {'entity': 'I-ORG', 'score': 0.7809999, 'index': 316, 'word': 'camera', 'start': 1347, 'end': 1353}, {'entity': 'B-ORG', 'score': 0.4608157, 'index': 330, 'word': 'viking', 'start': 1419, 'end': 1425}, {'entity': 'B-ORG', 'score': 0.6714569, 'index': 420, 'word': 'absolute', 'start': 1803, 'end': 1811}, {'entity': 'I-ORG', 'score': 0.76191556, 'index': 421, 'word': 'maximum', 'start': 1812, 'end': 1819}, {'entity': 'I-ORG', 'score': 0.67246604, 'index': 422, 'word': 'revolution', 'start': 1820, 'end': 1830}, {'entity': 'B-LOC', 'score': 0.6010912, 'index': 468, 'word': 'butte', 'start': 2041, 'end': 2046}, {'entity': 'I-ORG', 'score': 0.46326715, 'index': 469, 'word': 'or', 'start': 2047, 'end': 2049}, {'entity': 'I-ORG', 'score': 0.45148414, 'index': 470, 'word': 'mesa', 'start': 2050, 'end': 2054}, {'entity': 'B-LOC', 'score': 0.8956182, 'index': 480, 'word': 'american', 'start': 2094, 'end': 2102}, {'entity': 'I-LOC', 'score': 0.9263237, 'index': 481, 'word': 'west', 'start': 2103, 'end': 2107}]\n"
]
}
],
"source": [
"import json\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"GEOcite/AuthorParserModel\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"GEOcite/AuthorParserModel\")\n",
"classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"salida=classifier(text)\n",
"print(salida)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "b09510d4-fdb1-44a8-aa54-34c809af5aee",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"B-LOC 5\n",
"B-ORG 9\n",
"B-PER 1\n",
"I-LOC 3\n",
"I-ORG 29\n",
"I-PER 2\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word \n",
"B-LOC american 1\n",
" butte 1\n",
" c 1\n",
" mars 1\n",
" marti 1\n",
"B-ORG absolute 1\n",
" defenders 1\n",
" egypt 1\n",
" haunted 1\n",
" mars 1\n",
" nasa 2\n",
" viking 2\n",
"B-PER michael 1\n",
"I-LOC ##onia 1\n",
" ##yd 1\n",
" west 1\n",
"I-ORG ##an 1\n",
" ##h 1\n",
" ##ion 1\n",
" ##oce 1\n",
" ##r 1\n",
" ##rao 1\n",
" ##ry 1\n",
" 1 1\n",
" camera 1\n",
" civilization 1\n",
" gr 1\n",
" mars 5\n",
" maximum 1\n",
" mesa 2\n",
" nasa 1\n",
" of 1\n",
" on 1\n",
" or 1\n",
" orbite 1\n",
" pha 1\n",
" revolution 1\n",
" spacecraft 1\n",
" store 1\n",
" the 1\n",
"I-PER ##n 1\n",
" mali 1\n",
"dtype: int64"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"with open(\"18 GEOciteAuthorParserModel.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "116981e0-56cb-4094-99e4-e8b99f62597d",
"metadata": {},
"source": [
"## 19 mbruton/spa_en_XLM-R"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "7220081f-0732-4693-8a92-7b93d17b259a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'entity': 'r0:arg1|tem', 'score': 0.9885584, 'index': 4, 'word': '▁you', 'start': 6, 'end': 10}, {'entity': 'r0:root', 'score': 0.9828293, 'index': 5, 'word': \"'\", 'start': 10, 'end': 11}, {'entity': 'r0:root', 'score': 0.9987388, 'index': 6, 'word': 're', 'start': 11, 'end': 13}, {'entity': 'r0:arg2|atr', 'score': 0.9798707, 'index': 9, 'word': '▁scientist', 'start': 20, 'end': 30}, {'entity': 'r1:arg1|tem', 'score': 0.9950965, 'index': 11, 'word': '▁you', 'start': 31, 'end': 35}, {'entity': 'r1:root', 'score': 0.9981749, 'index': 13, 'word': '▁be', 'start': 42, 'end': 45}, {'entity': 'r1:arg2|atr', 'score': 0.98995215, 'index': 14, 'word': '▁able', 'start': 45, 'end': 50}, {'entity': 'r2:root', 'score': 0.99103266, 'index': 16, 'word': '▁tell', 'start': 53, 'end': 58}, {'entity': 'r2:arg2|ben', 'score': 0.9612953, 'index': 17, 'word': '▁me', 'start': 58, 'end': 61}, {'entity': 'r2:arg1|pat', 'score': 0.98190963, 'index': 20, 'word': '▁story', 'start': 71, 'end': 77}, {'entity': 'r3:arg1|tem', 'score': 0.98912394, 'index': 27, 'word': '▁which', 'start': 101, 'end': 107}, {'entity': 'r3:argM|adv', 'score': 0.60085195, 'index': 28, 'word': '▁obviously', 'start': 107, 'end': 117}, {'entity': 'r3:root', 'score': 0.9985306, 'index': 29, 'word': '▁is', 'start': 117, 'end': 120}, {'entity': 'r3:arg2|atr', 'score': 0.9856076, 'index': 30, 'word': '▁evidence', 'start': 120, 'end': 129}, {'entity': 'r4:root', 'score': 0.55564994, 'index': 32, 'word': '▁there', 'start': 134, 'end': 140}, {'entity': 'r4:root', 'score': 0.9971462, 'index': 33, 'word': '▁is', 'start': 140, 'end': 143}, {'entity': 'r4:arg1|tem', 'score': 0.94116557, 'index': 34, 'word': '▁life', 'start': 143, 'end': 148}, {'entity': 'r4:argM|loc', 'score': 0.54012036, 'index': 35, 'word': '▁on', 'start': 148, 'end': 151}, {'entity': 'r6:arg1|pat', 'score': 0.69593036, 'index': 41, 'word': '▁face', 'start': 170, 'end': 175}, {'entity': 'r6:root', 'score': 0.69639283, 'index': 43, 'word': '▁created', 'start': 179, 'end': 187}, {'entity': 'r6:arg0|agt', 'score': 0.64183134, 'index': 44, 'word': '▁by', 'start': 187, 'end': 190}, {'entity': 'r7:arg0|agt', 'score': 0.55535495, 'index': 61, 'word': 'craft', 'start': 254, 'end': 259}, {'entity': 'r8:root', 'score': 0.47476873, 'index': 63, 'word': '▁circ', 'start': 263, 'end': 268}, {'entity': 'r8:root', 'score': 0.47575742, 'index': 64, 'word': 'ling', 'start': 268, 'end': 272}, {'entity': 'r8:arg1|pat', 'score': 0.31078094, 'index': 66, 'word': '▁planet', 'start': 276, 'end': 283}, {'entity': 'r8:root', 'score': 0.31837395, 'index': 68, 'word': '▁sna', 'start': 284, 'end': 288}, {'entity': 'r9:root', 'score': 0.31875014, 'index': 69, 'word': 'pping', 'start': 288, 'end': 293}, {'entity': 'r8:arg1|pat', 'score': 0.286856, 'index': 70, 'word': '▁photos', 'start': 293, 'end': 300}, {'entity': 'r8:arg0|agt', 'score': 0.21294405, 'index': 73, 'word': '▁it', 'start': 306, 'end': 309}, {'entity': 'r9:root', 'score': 0.3105045, 'index': 74, 'word': '▁spot', 'start': 309, 'end': 314}, {'entity': 'r9:root', 'score': 0.31755808, 'index': 75, 'word': 'ted', 'start': 314, 'end': 317}, {'entity': 'r9:arg1|pat', 'score': 0.22570874, 'index': 79, 'word': '▁like', 'start': 329, 'end': 334}, {'entity': 'r9:arg1|pat', 'score': 0.2306108, 'index': 80, 'word': 'ness', 'start': 334, 'end': 338}, {'entity': 'r9:arg0|agt', 'score': 0.19357839, 'index': 87, 'word': '▁scientist', 'start': 358, 'end': 368}, {'entity': 'r9:root', 'score': 0.2762144, 'index': 89, 'word': '▁figure', 'start': 369, 'end': 376}, {'entity': 'r9:root', 'score': 0.22951078, 'index': 90, 'word': 'd', 'start': 376, 'end': 377}, {'entity': 'r8:arg1|tem', 'score': 0.078046024, 'index': 93, 'word': '▁it', 'start': 386, 'end': 389}, {'entity': 'r9:root', 'score': 0.15313852, 'index': 94, 'word': '▁was', 'start': 389, 'end': 393}, {'entity': 'r9:arg1|pat', 'score': 0.045638904, 'index': 99, 'word': '▁mesa', 'start': 414, 'end': 419}, {'entity': 'r9:arg1|tem', 'score': 0.058356848, 'index': 109, 'word': '▁one', 'start': 453, 'end': 457}, {'entity': 'r9:root', 'score': 0.1325599, 'index': 110, 'word': '▁had', 'start': 457, 'end': 461}, {'entity': 'r9:arg1|pat', 'score': 0.06691175, 'index': 111, 'word': '▁shadow', 'start': 461, 'end': 468}, {'entity': 'r9:arg0|agt', 'score': 0.06208066, 'index': 113, 'word': '▁that', 'start': 469, 'end': 474}, {'entity': 'r9:arg1|pat', 'score': 0.0632269, 'index': 115, 'word': '▁it', 'start': 479, 'end': 482}, {'entity': 'r9:root', 'score': 0.099622406, 'index': 116, 'word': '▁look', 'start': 482, 'end': 487}, {'entity': 'r9:arg1|pat', 'score': 0.034427807, 'index': 117, 'word': '▁like', 'start': 487, 'end': 492}, {'entity': 'r8:argM|tmp', 'score': 0.06514092, 'index': 128, 'word': '▁later', 'start': 527, 'end': 533}, {'entity': 'r9:arg0|agt', 'score': 0.09965178, 'index': 130, 'word': '▁we', 'start': 534, 'end': 537}, {'entity': 'r11:root', 'score': 0.08552331, 'index': 131, 'word': '▁reveal', 'start': 537, 'end': 544}, {'entity': 'r11:root', 'score': 0.07387665, 'index': 132, 'word': 'ed', 'start': 544, 'end': 546}, {'entity': 'r9:arg1|pat', 'score': 0.12956654, 'index': 134, 'word': '▁image', 'start': 550, 'end': 556}, {'entity': 'r8:argM|adv', 'score': 0.038444836, 'index': 135, 'word': '▁for', 'start': 556, 'end': 560}, {'entity': 'r9:arg0|agt', 'score': 0.047709465, 'index': 141, 'word': '▁we', 'start': 576, 'end': 579}, {'entity': 'r10:root', 'score': 0.077698395, 'index': 142, 'word': '▁made', 'start': 579, 'end': 584}, {'entity': 'r9:arg1|pat', 'score': 0.04974987, 'index': 147, 'word': '▁it', 'start': 602, 'end': 605}, {'entity': 'r11:root', 'score': 0.073394544, 'index': 148, 'word': '▁was', 'start': 605, 'end': 609}, {'entity': 'r9:arg1|pat', 'score': 0.04125579, 'index': 151, 'word': '▁rock', 'start': 616, 'end': 621}, {'entity': 'r9:arg1|pat', 'score': 0.039437402, 'index': 152, 'word': '▁formation', 'start': 621, 'end': 631}, {'entity': 'r8:arg1|tem', 'score': 0.044781037, 'index': 153, 'word': '▁that', 'start': 631, 'end': 636}, {'entity': 'r8:argM|adv', 'score': 0.039434977, 'index': 154, 'word': '▁just', 'start': 636, 'end': 641}, {'entity': 'r11:root', 'score': 0.0661228, 'index': 155, 'word': '▁rese', 'start': 641, 'end': 646}, {'entity': 'r11:root', 'score': 0.0739401, 'index': 156, 'word': 'mble', 'start': 646, 'end': 650}, {'entity': 'r11:root', 'score': 0.06486134, 'index': 157, 'word': 'd', 'start': 650, 'end': 651}, {'entity': 'r9:arg1|pat', 'score': 0.046511702, 'index': 160, 'word': '▁head', 'start': 659, 'end': 664}, {'entity': 'r11:root', 'score': 0.0790022, 'index': 169, 'word': '▁for', 'start': 692, 'end': 696}, {'entity': 'r10:root', 'score': 0.077606075, 'index': 170, 'word': 'med', 'start': 696, 'end': 699}, {'entity': 'r9:arg0|agt', 'score': 0.040013608, 'index': 171, 'word': '▁by', 'start': 699, 'end': 702}, {'entity': 'r9:arg0|agt', 'score': 0.08678443, 'index': 175, 'word': '▁We', 'start': 711, 'end': 714}, {'entity': 'r9:root', 'score': 0.10425882, 'index': 177, 'word': '▁announced', 'start': 719, 'end': 729}, {'entity': 'r9:arg1|pat', 'score': 0.13532344, 'index': 178, 'word': '▁it', 'start': 729, 'end': 732}, {'entity': 'r9:arg0|agt', 'score': 0.03753009, 'index': 180, 'word': '▁we', 'start': 740, 'end': 743}, {'entity': 'r9:root', 'score': 0.07860999, 'index': 181, 'word': '▁thought', 'start': 743, 'end': 751}, {'entity': 'r9:arg1|pat', 'score': 0.058518294, 'index': 182, 'word': '▁it', 'start': 751, 'end': 754}, {'entity': 'r9:root', 'score': 0.15334934, 'index': 184, 'word': '▁be', 'start': 760, 'end': 763}, {'entity': 'r8:arg2|atr', 'score': 0.077581845, 'index': 187, 'word': '▁way', 'start': 770, 'end': 774}, {'entity': 'r9:root', 'score': 0.1817208, 'index': 189, 'word': '▁engage', 'start': 777, 'end': 784}, {'entity': 'r8:arg1|tem', 'score': 0.06929828, 'index': 191, 'word': '▁public', 'start': 788, 'end': 795}, {'entity': 'r8:arg1|tem', 'score': 0.07084324, 'index': 209, 'word': '▁it', 'start': 853, 'end': 856}, {'entity': 'r9:root', 'score': 0.097537614, 'index': 210, 'word': '▁did', 'start': 856, 'end': 860}, {'entity': 'r8:arg1|tem', 'score': 0.22996224, 'index': 213, 'word': '▁face', 'start': 866, 'end': 871}, {'entity': 'r8:argM|tmp', 'score': 0.15224062, 'index': 216, 'word': '▁soon', 'start': 879, 'end': 884}, {'entity': 'r9:root', 'score': 0.14052552, 'index': 217, 'word': '▁became', 'start': 884, 'end': 891}, {'entity': 'r7:arg2|atr', 'score': 0.13227668, 'index': 220, 'word': '▁icon', 'start': 897, 'end': 902}, {'entity': 'r2:root', 'score': 0.34378532, 'index': 222, 'word': '▁shot', 'start': 903, 'end': 908}, {'entity': 'r4:argM|loc', 'score': 0.28516474, 'index': 223, 'word': '▁in', 'start': 908, 'end': 911}, {'entity': 'r3:root', 'score': 0.45903492, 'index': 226, 'word': '▁appeared', 'start': 919, 'end': 928}, {'entity': 'r4:arg2|loc', 'score': 0.18714885, 'index': 227, 'word': '▁in', 'start': 928, 'end': 931}, {'entity': 'r3:argM|tmp', 'score': 0.3332829, 'index': 248, 'word': '▁for', 'start': 1008, 'end': 1012}, {'entity': 'r4:arg0|agt', 'score': 0.4323846, 'index': 253, 'word': '▁people', 'start': 1027, 'end': 1034}, {'entity': 'r4:root', 'score': 0.6460269, 'index': 254, 'word': '▁thought', 'start': 1034, 'end': 1042}, {'entity': 'r5:arg1|tem', 'score': 0.7946849, 'index': 257, 'word': '▁land', 'start': 1054, 'end': 1059}, {'entity': 'r5:arg1|tem', 'score': 0.7655751, 'index': 258, 'word': 'form', 'start': 1059, 'end': 1063}, {'entity': 'r5:root', 'score': 0.87633103, 'index': 259, 'word': '▁was', 'start': 1063, 'end': 1067}, {'entity': 'r5:arg2|atr', 'score': 0.82787794, 'index': 260, 'word': '▁evidence', 'start': 1067, 'end': 1076}, {'entity': 'r5:arg0|agt', 'score': 0.4799223, 'index': 269, 'word': '▁scientist', 'start': 1105, 'end': 1115}, {'entity': 'r5:root', 'score': 0.8739596, 'index': 271, 'word': '▁wanted', 'start': 1116, 'end': 1123}, {'entity': 'r5:root', 'score': 0.4174833, 'index': 273, 'word': '▁hi', 'start': 1126, 'end': 1129}, {'entity': 'r6:arg1|pat', 'score': 0.72568583, 'index': 275, 'word': '▁it', 'start': 1131, 'end': 1134}, {'entity': 'r5:argM|adv', 'score': 0.48375428, 'index': 278, 'word': '▁really', 'start': 1139, 'end': 1146}, {'entity': 'r5:arg0|agt', 'score': 0.5601293, 'index': 281, 'word': '▁defender', 'start': 1151, 'end': 1160}, {'entity': 'r4:root', 'score': 0.5749663, 'index': 287, 'word': '▁wish', 'start': 1180, 'end': 1185}, {'entity': 'r5:root', 'score': 0.5627814, 'index': 289, 'word': '▁was', 'start': 1191, 'end': 1195}, {'entity': 'r5:arg1|tem', 'score': 0.7166488, 'index': 292, 'word': '▁civiliza', 'start': 1203, 'end': 1212}, {'entity': 'r5:arg1|tem', 'score': 0.45804867, 'index': 293, 'word': 'tion', 'start': 1212, 'end': 1216}, {'entity': 'r5:arg0|agt', 'score': 0.82322997, 'index': 297, 'word': '▁We', 'start': 1225, 'end': 1228}, {'entity': 'r5:root', 'score': 0.84057826, 'index': 298, 'word': '▁decided', 'start': 1228, 'end': 1236}, {'entity': 'r5:root', 'score': 0.49023774, 'index': 300, 'word': '▁take', 'start': 1239, 'end': 1244}, {'entity': 'r6:arg1|pat', 'score': 0.4983117, 'index': 302, 'word': '▁shot', 'start': 1252, 'end': 1257}, {'entity': 'r6:arg1|tem', 'score': 0.7063215, 'index': 307, 'word': '▁we', 'start': 1275, 'end': 1278}, {'entity': 'r6:root', 'score': 0.58319706, 'index': 308, 'word': '▁were', 'start': 1278, 'end': 1283}, {'entity': 'r6:arg2|atr', 'score': 0.6607443, 'index': 312, 'word': '▁wrong', 'start': 1286, 'end': 1292}, {'entity': 'r6:argM|tmp', 'score': 0.4239783, 'index': 314, 'word': '▁on', 'start': 1293, 'end': 1296}, {'entity': 'r7:arg0|agt', 'score': 0.61083174, 'index': 319, 'word': '▁Michael', 'start': 1311, 'end': 1319}, {'entity': 'r7:arg0|agt', 'score': 0.43220523, 'index': 320, 'word': '▁Malin', 'start': 1319, 'end': 1325}, {'entity': 'r7:root', 'score': 0.568414, 'index': 329, 'word': '▁took', 'start': 1358, 'end': 1363}, {'entity': 'r6:arg1|pat', 'score': 0.70329344, 'index': 331, 'word': '▁picture', 'start': 1365, 'end': 1373}, {'entity': 'r7:arg1|tem', 'score': 0.4258404, 'index': 332, 'word': '▁that', 'start': 1373, 'end': 1378}, {'entity': 'r7:root', 'score': 0.72852856, 'index': 333, 'word': '▁was', 'start': 1378, 'end': 1382}, {'entity': 'r7:arg2|atr', 'score': 0.4354887, 'index': 336, 'word': '▁sharp', 'start': 1392, 'end': 1398}, {'entity': 'r7:arg2|atr', 'score': 0.3854383, 'index': 337, 'word': 'er', 'start': 1398, 'end': 1400}, {'entity': 'r8:root', 'score': 0.4831441, 'index': 344, 'word': '▁reveal', 'start': 1433, 'end': 1440}, {'entity': 'r8:root', 'score': 0.3593413, 'index': 345, 'word': 'ing', 'start': 1440, 'end': 1443}, {'entity': 'r9:arg1|pat', 'score': 0.2637543, 'index': 348, 'word': '▁land', 'start': 1453, 'end': 1458}, {'entity': 'r8:arg1|tem', 'score': 0.20474246, 'index': 351, 'word': '▁which', 'start': 1463, 'end': 1469}, {'entity': 'r9:root', 'score': 0.3001293, 'index': 352, 'word': '▁meant', 'start': 1469, 'end': 1475}, {'entity': 'r9:arg1|pat', 'score': 0.060182273, 'index': 355, 'word': '▁monument', 'start': 1484, 'end': 1493}, {'entity': 'r8:arg1|tem', 'score': 0.17025372, 'index': 360, 'word': '▁picture', 'start': 1504, 'end': 1512}, {'entity': 'r9:root', 'score': 0.30658206, 'index': 361, 'word': '▁wasn', 'start': 1512, 'end': 1517}, {'entity': 'r8:arg2|atr', 'score': 0.1777854, 'index': 365, 'word': '▁clear', 'start': 1524, 'end': 1530}, {'entity': 'r9:arg0|agt', 'score': 0.04626326, 'index': 369, 'word': '▁which', 'start': 1538, 'end': 1544}, {'entity': 'r9:root', 'score': 0.13540097, 'index': 371, 'word': '▁mean', 'start': 1550, 'end': 1555}, {'entity': 'r9:arg1|pat', 'score': 0.12729116, 'index': 373, 'word': '▁mark', 'start': 1561, 'end': 1566}, {'entity': 'r9:arg1|pat', 'score': 0.11717077, 'index': 374, 'word': 'ings', 'start': 1566, 'end': 1570}, {'entity': 'r9:root', 'score': 0.15416586, 'index': 376, 'word': '▁hidden', 'start': 1575, 'end': 1582}, {'entity': 'r9:arg0|agt', 'score': 0.0479266, 'index': 377, 'word': '▁by', 'start': 1582, 'end': 1585}, {'entity': 'r8:arg1|tem', 'score': 0.06482706, 'index': 386, 'word': '▁rumor', 'start': 1609, 'end': 1615}, {'entity': 'r9:root', 'score': 0.09473133, 'index': 387, 'word': '▁started', 'start': 1615, 'end': 1623}, {'entity': 'r9:root', 'score': 0.08225819, 'index': 391, 'word': '▁prove', 'start': 1631, 'end': 1637}, {'entity': 'r9:arg1|pat', 'score': 0.11032233, 'index': 392, 'word': '▁them', 'start': 1637, 'end': 1642}, {'entity': 'r9:arg1|pat', 'score': 0.039644323, 'index': 393, 'word': '▁wrong', 'start': 1642, 'end': 1648}, {'entity': 'r8:argM|tmp', 'score': 0.058614645, 'index': 394, 'word': '▁on', 'start': 1648, 'end': 1651}, {'entity': 'r9:arg0|agt', 'score': 0.09158646, 'index': 399, 'word': '▁we', 'start': 1665, 'end': 1668}, {'entity': 'r9:root', 'score': 0.097704664, 'index': 400, 'word': '▁decided', 'start': 1668, 'end': 1676}, {'entity': 'r9:root', 'score': 0.06539654, 'index': 402, 'word': '▁take', 'start': 1679, 'end': 1684}, {'entity': 'r9:arg1|pat', 'score': 0.103520945, 'index': 404, 'word': '▁picture', 'start': 1692, 'end': 1700}, {'entity': 'r9:arg1|pat', 'score': 0.051061057, 'index': 408, 'word': '▁it', 'start': 1713, 'end': 1716}, {'entity': 'r10:root', 'score': 0.08168564, 'index': 409, 'word': '▁was', 'start': 1716, 'end': 1720}, {'entity': 'r9:arg1|pat', 'score': 0.03991396, 'index': 414, 'word': '▁day', 'start': 1739, 'end': 1743}, {'entity': 'r9:arg0|agt', 'score': 0.07429483, 'index': 416, 'word': '▁Malin', 'start': 1744, 'end': 1750}, {'entity': 'r9:arg0|agt', 'score': 0.0536705, 'index': 419, 'word': '▁team', 'start': 1752, 'end': 1757}, {'entity': 'r10:root', 'score': 0.08358262, 'index': 420, 'word': '▁capture', 'start': 1757, 'end': 1765}, {'entity': 'r10:root', 'score': 0.079094924, 'index': 421, 'word': 'd', 'start': 1765, 'end': 1766}, {'entity': 'r9:arg1|pat', 'score': 0.11371465, 'index': 424, 'word': '▁photo', 'start': 1777, 'end': 1783}, {'entity': 'r11:root', 'score': 0.0549004, 'index': 425, 'word': '▁using', 'start': 1783, 'end': 1789}, {'entity': 'r9:arg1|pat', 'score': 0.062476702, 'index': 427, 'word': '▁camera', 'start': 1793, 'end': 1800}, {'entity': 'r9:arg1|pat', 'score': 0.023913436, 'index': 434, 'word': '▁With', 'start': 1831, 'end': 1836}, {'entity': 'r9:arg0|agt', 'score': 0.04540858, 'index': 437, 'word': '▁you', 'start': 1848, 'end': 1852}, {'entity': 'r9:root', 'score': 0.1428813, 'index': 439, 'word': '▁discern', 'start': 1856, 'end': 1864}, {'entity': 'r9:arg1|pat', 'score': 0.12133165, 'index': 440, 'word': '▁things', 'start': 1864, 'end': 1871}, {'entity': 'r8:argM|loc', 'score': 0.03498705, 'index': 441, 'word': '▁in', 'start': 1871, 'end': 1874}, {'entity': 'r9:arg1|pat', 'score': 0.03346169, 'index': 453, 'word': '▁which', 'start': 1926, 'end': 1932}, {'entity': 'r9:root', 'score': 0.12779085, 'index': 454, 'word': '▁means', 'start': 1932, 'end': 1938}, {'entity': 'r9:root', 'score': 0.057673983, 'index': 456, 'word': '▁there', 'start': 1941, 'end': 1947}, {'entity': 'r9:root', 'score': 0.14751895, 'index': 457, 'word': '▁were', 'start': 1947, 'end': 1952}, {'entity': 'r9:arg1|pat', 'score': 0.07470571, 'index': 459, 'word': '▁sign', 'start': 1956, 'end': 1961}, {'entity': 'r9:arg0|agt', 'score': 0.054813575, 'index': 464, 'word': '▁you', 'start': 1971, 'end': 1975}, {'entity': 'r8:argM|adv', 'score': 0.036606755, 'index': 466, 'word': '▁easily', 'start': 1981, 'end': 1988}, {'entity': 'r9:root', 'score': 0.17583221, 'index': 467, 'word': '▁see', 'start': 1988, 'end': 1992}, {'entity': 'r9:arg2|atr', 'score': 0.052657988, 'index': 468, 'word': '▁what', 'start': 1992, 'end': 1997}, {'entity': 'r8:arg1|tem', 'score': 0.08003322, 'index': 469, 'word': '▁they', 'start': 1997, 'end': 2002}, {'entity': 'r9:root', 'score': 0.13172248, 'index': 470, 'word': '▁were', 'start': 2002, 'end': 2007}, {'entity': 'r9:arg1|pat', 'score': 0.10661528, 'index': 472, 'word': '▁What', 'start': 2008, 'end': 2013}, {'entity': 'r9:arg0|agt', 'score': 0.059802476, 'index': 474, 'word': '▁picture', 'start': 2017, 'end': 2025}, {'entity': 'r9:root', 'score': 0.14850138, 'index': 475, 'word': '▁showed', 'start': 2025, 'end': 2032}, {'entity': 'r9:root', 'score': 0.23177616, 'index': 476, 'word': '▁was', 'start': 2032, 'end': 2036}, {'entity': 'r8:arg2|atr', 'score': 0.21636702, 'index': 478, 'word': '▁but', 'start': 2040, 'end': 2044}, {'entity': 'r7:arg2|atr', 'score': 0.2325799, 'index': 479, 'word': 'te', 'start': 2044, 'end': 2046}, {'entity': 'r7:arg1|tem', 'score': 0.26516578, 'index': 483, 'word': '▁which', 'start': 2055, 'end': 2061}, {'entity': 'r7:root', 'score': 0.20337786, 'index': 484, 'word': '▁are', 'start': 2061, 'end': 2065}, {'entity': 'r7:arg2|atr', 'score': 0.2973422, 'index': 485, 'word': '▁land', 'start': 2065, 'end': 2070}, {'entity': 'r7:arg2|atr', 'score': 0.31532818, 'index': 486, 'word': 'form', 'start': 2070, 'end': 2074}, {'entity': 'r7:arg2|atr', 'score': 0.2744848, 'index': 487, 'word': 's', 'start': 2074, 'end': 2075}]\n"
]
}
],
"source": [
"import json\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"mbruton/spa_en_XLM-R\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"mbruton/spa_en_XLM-R\")\n",
"classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"salida=classifier(text)\n",
"print(salida)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "4727c78d-e205-48e0-b30a-3b475a3060c7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"entity word \n",
"r0:arg1|tem ▁you 1\n",
"r0:arg2|atr ▁scientist 1\n",
"r0:root 1\n",
" re 1\n",
"r10:root d 1\n",
" ..\n",
"r9:root ▁there 1\n",
" ▁thought 1\n",
" ▁was 2\n",
" ▁wasn 1\n",
" ▁were 2\n",
"Length: 168, dtype: int64"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"with open(\"19 mbrutonspa_en_XLM-R.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"#print(aux.groupby(['entity']).size())\n",
"aux2 = aux.groupby(['entity', 'word']).size()\n",
"aux2\n"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "172bc82e-129d-40c1-9f33-0a80eaa9237d",
"metadata": {},
"outputs": [],
"source": [
"pd.set_option('display.max_rows', None) "
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "c4c6dff8-ce18-47e3-8427-e74ca7f4307f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"entity word \n",
"r0:arg1|tem ▁you 1\n",
"r0:arg2|atr ▁scientist 1\n",
"r0:root 1\n",
" re 1\n",
"r10:root d 1\n",
" med 1\n",
" ▁capture 1\n",
" ▁made 1\n",
" ▁was 1\n",
"r11:root d 1\n",
" ed 1\n",
" mble 1\n",
" ▁for 1\n",
" ▁rese 1\n",
" ▁reveal 1\n",
" ▁using 1\n",
" ▁was 1\n",
"r1:arg1|tem ▁you 1\n",
"r1:arg2|atr ▁able 1\n",
"r1:root ▁be 1\n",
"r2:arg1|pat ▁story 1\n",
"r2:arg2|ben ▁me 1\n",
"r2:root ▁shot 1\n",
" ▁tell 1\n",
"r3:arg1|tem ▁which 1\n",
"r3:arg2|atr ▁evidence 1\n",
"r3:argM|adv ▁obviously 1\n",
"r3:argM|tmp ▁for 1\n",
"r3:root ▁appeared 1\n",
" ▁is 1\n",
"r4:arg0|agt ▁people 1\n",
"r4:arg1|tem ▁life 1\n",
"r4:arg2|loc ▁in 1\n",
"r4:argM|loc ▁in 1\n",
" ▁on 1\n",
"r4:root ▁is 1\n",
" ▁there 1\n",
" ▁thought 1\n",
" ▁wish 1\n",
"r5:arg0|agt ▁We 1\n",
" ▁defender 1\n",
" ▁scientist 1\n",
"r5:arg1|tem form 1\n",
" tion 1\n",
" ▁civiliza 1\n",
" ▁land 1\n",
"r5:arg2|atr ▁evidence 1\n",
"r5:argM|adv ▁really 1\n",
"r5:root ▁decided 1\n",
" ▁hi 1\n",
" ▁take 1\n",
" ▁wanted 1\n",
" ▁was 2\n",
"r6:arg0|agt ▁by 1\n",
"r6:arg1|pat ▁face 1\n",
" ▁it 1\n",
" ▁picture 1\n",
" ▁shot 1\n",
"r6:arg1|tem ▁we 1\n",
"r6:arg2|atr ▁wrong 1\n",
"r6:argM|tmp ▁on 1\n",
"r6:root ▁created 1\n",
" ▁were 1\n",
"r7:arg0|agt craft 1\n",
" ▁Malin 1\n",
" ▁Michael 1\n",
"r7:arg1|tem ▁that 1\n",
" ▁which 1\n",
"r7:arg2|atr er 1\n",
" form 1\n",
" s 1\n",
" te 1\n",
" ▁icon 1\n",
" ▁land 1\n",
" ▁sharp 1\n",
"r7:root ▁are 1\n",
" ▁took 1\n",
" ▁was 1\n",
"r8:arg0|agt ▁it 1\n",
"r8:arg1|pat ▁photos 1\n",
" ▁planet 1\n",
"r8:arg1|tem ▁face 1\n",
" ▁it 2\n",
" ▁picture 1\n",
" ▁public 1\n",
" ▁rumor 1\n",
" ▁that 1\n",
" ▁they 1\n",
" ▁which 1\n",
"r8:arg2|atr ▁but 1\n",
" ▁clear 1\n",
" ▁way 1\n",
"r8:argM|adv ▁easily 1\n",
" ▁for 1\n",
" ▁just 1\n",
"r8:argM|loc ▁in 1\n",
"r8:argM|tmp ▁later 1\n",
" ▁on 1\n",
" ▁soon 1\n",
"r8:root ing 1\n",
" ling 1\n",
" ▁circ 1\n",
" ▁reveal 1\n",
" ▁sna 1\n",
"r9:arg0|agt ▁Malin 1\n",
" ▁We 1\n",
" ▁by 2\n",
" ▁picture 1\n",
" ▁scientist 1\n",
" ▁team 1\n",
" ▁that 1\n",
" ▁we 4\n",
" ▁which 1\n",
" ▁you 2\n",
"r9:arg1|pat ings 1\n",
" ness 1\n",
" ▁What 1\n",
" ▁With 1\n",
" ▁camera 1\n",
" ▁day 1\n",
" ▁formation 1\n",
" ▁head 1\n",
" ▁image 1\n",
" ▁it 5\n",
" ▁land 1\n",
" ▁like 2\n",
" ▁mark 1\n",
" ▁mesa 1\n",
" ▁monument 1\n",
" ▁photo 1\n",
" ▁picture 1\n",
" ▁rock 1\n",
" ▁shadow 1\n",
" ▁sign 1\n",
" ▁them 1\n",
" ▁things 1\n",
" ▁which 1\n",
" ▁wrong 1\n",
"r9:arg1|tem ▁one 1\n",
"r9:arg2|atr ▁what 1\n",
"r9:root d 1\n",
" pping 1\n",
" ted 1\n",
" ▁announced 1\n",
" ▁be 1\n",
" ▁became 1\n",
" ▁decided 1\n",
" ▁did 1\n",
" ▁discern 1\n",
" ▁engage 1\n",
" ▁figure 1\n",
" ▁had 1\n",
" ▁hidden 1\n",
" ▁look 1\n",
" ▁mean 1\n",
" ▁means 1\n",
" ▁meant 1\n",
" ▁prove 1\n",
" ▁see 1\n",
" ▁showed 1\n",
" ▁spot 1\n",
" ▁started 1\n",
" ▁take 1\n",
" ▁there 1\n",
" ▁thought 1\n",
" ▁was 2\n",
" ▁wasn 1\n",
" ▁were 2\n",
"dtype: int64"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"aux2"
]
},
{
"cell_type": "markdown",
"id": "747b229b-c017-4db6-bdc9-94ef7210508e",
"metadata": {},
"source": [
"## 20 mbruton/gal_enptsp_mBERT"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "61034c1e-ab67-4221-9aac-bdd54263f6da",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'entity': 'r0:root', 'score': 0.69288, 'index': 6, 'word': 're', 'start': 11, 'end': 13}, {'entity': 'r1:root', 'score': 0.8305716, 'index': 16, 'word': 'tell', 'start': 54, 'end': 58}, {'entity': 'r2:arg1', 'score': 0.65180844, 'index': 20, 'word': 'story', 'start': 72, 'end': 77}, {'entity': 'r5:root', 'score': 0.4588822, 'index': 44, 'word': 'created', 'start': 180, 'end': 187}, {'entity': 'r6:root', 'score': 0.49542937, 'index': 64, 'word': 'ci', 'start': 264, 'end': 266}, {'entity': 'r6:root', 'score': 0.45653778, 'index': 65, 'word': '##rc', 'start': 266, 'end': 268}, {'entity': 'r6:root', 'score': 0.31713662, 'index': 66, 'word': '##ling', 'start': 268, 'end': 272}, {'entity': 'r6:arg1', 'score': 0.37775016, 'index': 68, 'word': 'planet', 'start': 277, 'end': 283}, {'entity': 'r6:root', 'score': 0.41948485, 'index': 70, 'word': 'sna', 'start': 285, 'end': 288}, {'entity': 'r6:root', 'score': 0.28788853, 'index': 71, 'word': '##pping', 'start': 288, 'end': 293}, {'entity': 'r6:arg1', 'score': 0.3409012, 'index': 72, 'word': 'photos', 'start': 294, 'end': 300}, {'entity': 'r8:root', 'score': 0.29355028, 'index': 76, 'word': 'spotted', 'start': 310, 'end': 317}, {'entity': 'r8:root', 'score': 0.27520663, 'index': 90, 'word': 'figure', 'start': 370, 'end': 376}, {'entity': 'r8:root', 'score': 0.1575869, 'index': 170, 'word': 'formed', 'start': 693, 'end': 699}, {'entity': 'r8:root', 'score': 0.17861637, 'index': 178, 'word': 'announced', 'start': 720, 'end': 729}, {'entity': 'r8:root', 'score': 0.17304042, 'index': 227, 'word': 'appeared', 'start': 920, 'end': 928}, {'entity': 'r5:root', 'score': 0.31188446, 'index': 253, 'word': 'thought', 'start': 1035, 'end': 1042}, {'entity': 'r5:root', 'score': 0.5253225, 'index': 271, 'word': 'hide', 'start': 1127, 'end': 1131}, {'entity': 'r6:arg1', 'score': 0.34368348, 'index': 272, 'word': 'it', 'start': 1132, 'end': 1134}, {'entity': 'r2:root', 'score': 0.6164163, 'index': 284, 'word': 'wish', 'start': 1181, 'end': 1185}, {'entity': 'r4:root', 'score': 0.31295392, 'index': 296, 'word': 'take', 'start': 1240, 'end': 1244}, {'entity': 'r2:root', 'score': 0.26798648, 'index': 301, 'word': 'make', 'start': 1266, 'end': 1270}, {'entity': 'r8:root', 'score': 0.10455666, 'index': 326, 'word': 'took', 'start': 1359, 'end': 1363}, {'entity': 'r7:arg1', 'score': 0.13289611, 'index': 328, 'word': 'picture', 'start': 1366, 'end': 1373}, {'entity': 'r8:root', 'score': 0.08957504, 'index': 341, 'word': 'reveal', 'start': 1434, 'end': 1440}]\n"
]
}
],
"source": [
"import json\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"mbruton/gal_enptsp_mBERT\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"mbruton/gal_enptsp_mBERT\")\n",
"classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"salida=classifier(text)\n",
"print(salida)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "2430b81e-ba4d-43b3-b644-43f90cb01c1f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"r0:root 1\n",
"r1:root 1\n",
"r2:arg1 1\n",
"r2:root 2\n",
"r4:root 1\n",
"r5:root 3\n",
"r6:arg1 3\n",
"r6:root 5\n",
"r7:arg1 1\n",
"r8:root 7\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word \n",
"r0:root re 1\n",
"r1:root tell 1\n",
"r2:arg1 story 1\n",
"r2:root make 1\n",
" wish 1\n",
"r4:root take 1\n",
"r5:root created 1\n",
" hide 1\n",
" thought 1\n",
"r6:arg1 it 1\n",
" photos 1\n",
" planet 1\n",
"r6:root ##ling 1\n",
" ##pping 1\n",
" ##rc 1\n",
" ci 1\n",
" sna 1\n",
"r7:arg1 picture 1\n",
"r8:root announced 1\n",
" appeared 1\n",
" figure 1\n",
" formed 1\n",
" reveal 1\n",
" spotted 1\n",
" took 1\n",
"dtype: int64"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open(\"20 mbrutongal_enptsp_mBERT.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "fddf510a-a810-4cd6-9e03-675e31a660eb",
"metadata": {},
"source": [
"## 21 benjamin/wtp-bert-tiny"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "4e3dff5d-8b2c-4f45-a475-04f7b11e2c41",
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "The checkpoint you are trying to load has model type `bert-char` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:951\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:653\u001b[0m, in \u001b[0;36m_LazyConfigMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping:\n\u001b[1;32m--> 653\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[0;32m 654\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping[key]\n",
"\u001b[1;31mKeyError\u001b[0m: 'bert-char'",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[33], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mjson\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-bert-tiny\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-bert-tiny\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m classifier \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:953\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m--> 953\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 954\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe checkpoint you are trying to load has model type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 955\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut Transformers does not recognize this architecture. This could be because of an \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 956\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124missue with the checkpoint, or because your version of Transformers is out of date.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 957\u001b[0m )\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_dict(config_dict, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39munused_kwargs)\n\u001b[0;32m 959\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 960\u001b[0m \u001b[38;5;66;03m# Fallback: use pattern matching on the string.\u001b[39;00m\n\u001b[0;32m 961\u001b[0m \u001b[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001b[39;00m\n",
"\u001b[1;31mValueError\u001b[0m: The checkpoint you are trying to load has model type `bert-char` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date."
]
}
],
"source": [
"import json\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"benjamin/wtp-bert-tiny\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"benjamin/wtp-bert-tiny\")\n",
"classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"salida=classifier(text)\n",
"print(salida)"
]
},
{
"cell_type": "markdown",
"id": "551d370f-6909-47ab-8f71-6ae001b06067",
"metadata": {},
"source": [
"## 22 benjamin/wtp-canine-s-1l"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "3dd00e55-9d25-4119-b4f0-ff1d64666c52",
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:951\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:653\u001b[0m, in \u001b[0;36m_LazyConfigMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping:\n\u001b[1;32m--> 653\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[0;32m 654\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping[key]\n",
"\u001b[1;31mKeyError\u001b[0m: 'la-canine'",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[35], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mjson\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-1l\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-1l\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m classifier \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:953\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m--> 953\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 954\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe checkpoint you are trying to load has model type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 955\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut Transformers does not recognize this architecture. This could be because of an \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 956\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124missue with the checkpoint, or because your version of Transformers is out of date.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 957\u001b[0m )\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_dict(config_dict, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39munused_kwargs)\n\u001b[0;32m 959\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 960\u001b[0m \u001b[38;5;66;03m# Fallback: use pattern matching on the string.\u001b[39;00m\n\u001b[0;32m 961\u001b[0m \u001b[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001b[39;00m\n",
"\u001b[1;31mValueError\u001b[0m: The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date."
]
}
],
"source": [
"import json\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"benjamin/wtp-canine-s-1l\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"benjamin/wtp-canine-s-1l\")\n",
"classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"salida=classifier(text)\n",
"print(salida)"
]
},
{
"cell_type": "markdown",
"id": "3f68ad13-321b-443e-8a71-703ee218e1d2",
"metadata": {},
"source": [
"## 23 benjamin/wtp-canine-s-6l"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "ee272836-552e-4335-86b5-6ef64c83294b",
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:951\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:653\u001b[0m, in \u001b[0;36m_LazyConfigMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping:\n\u001b[1;32m--> 653\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[0;32m 654\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping[key]\n",
"\u001b[1;31mKeyError\u001b[0m: 'la-canine'",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[37], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mjson\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-6l\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-6l\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m classifier \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:953\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m--> 953\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 954\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe checkpoint you are trying to load has model type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 955\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut Transformers does not recognize this architecture. This could be because of an \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 956\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124missue with the checkpoint, or because your version of Transformers is out of date.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 957\u001b[0m )\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_dict(config_dict, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39munused_kwargs)\n\u001b[0;32m 959\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 960\u001b[0m \u001b[38;5;66;03m# Fallback: use pattern matching on the string.\u001b[39;00m\n\u001b[0;32m 961\u001b[0m \u001b[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001b[39;00m\n",
"\u001b[1;31mValueError\u001b[0m: The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date."
]
}
],
"source": [
"import json\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"benjamin/wtp-canine-s-6l\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"benjamin/wtp-canine-s-6l\")\n",
"classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"salida=classifier(text)\n",
"print(salida)"
]
},
{
"cell_type": "markdown",
"id": "d7d5fa36-7761-47b3-a4d1-110ad004db06",
"metadata": {},
"source": [
"## 24 benjamin/wtp-canine-s-9l"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "1759bc6d-9a6a-4466-8226-3b8508170373",
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:951\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:653\u001b[0m, in \u001b[0;36m_LazyConfigMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping:\n\u001b[1;32m--> 653\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[0;32m 654\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping[key]\n",
"\u001b[1;31mKeyError\u001b[0m: 'la-canine'",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[39], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mjson\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-9l\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-9l\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m classifier \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:953\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m--> 953\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 954\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe checkpoint you are trying to load has model type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 955\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut Transformers does not recognize this architecture. This could be because of an \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 956\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124missue with the checkpoint, or because your version of Transformers is out of date.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 957\u001b[0m )\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_dict(config_dict, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39munused_kwargs)\n\u001b[0;32m 959\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 960\u001b[0m \u001b[38;5;66;03m# Fallback: use pattern matching on the string.\u001b[39;00m\n\u001b[0;32m 961\u001b[0m \u001b[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001b[39;00m\n",
"\u001b[1;31mValueError\u001b[0m: The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date."
]
}
],
"source": [
"import json\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"benjamin/wtp-canine-s-9l\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"benjamin/wtp-canine-s-9l\")\n",
"classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"salida=classifier(text)\n",
"print(salida)"
]
},
{
"cell_type": "markdown",
"id": "c2509820-70e1-49ce-959c-74430c8ace62",
"metadata": {},
"source": [
"## 25 benjamin/wtp-canine-s-1l-no-adapters"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "36801845-c8c2-49b4-9157-3f915be02912",
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:951\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:653\u001b[0m, in \u001b[0;36m_LazyConfigMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping:\n\u001b[1;32m--> 653\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[0;32m 654\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping[key]\n",
"\u001b[1;31mKeyError\u001b[0m: 'la-canine'",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[41], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mjson\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-1l-no-adapters\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-1l-no-adapters\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m classifier \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:953\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m--> 953\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 954\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe checkpoint you are trying to load has model type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 955\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut Transformers does not recognize this architecture. This could be because of an \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 956\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124missue with the checkpoint, or because your version of Transformers is out of date.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 957\u001b[0m )\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_dict(config_dict, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39munused_kwargs)\n\u001b[0;32m 959\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 960\u001b[0m \u001b[38;5;66;03m# Fallback: use pattern matching on the string.\u001b[39;00m\n\u001b[0;32m 961\u001b[0m \u001b[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001b[39;00m\n",
"\u001b[1;31mValueError\u001b[0m: The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date."
]
}
],
"source": [
"import json\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"benjamin/wtp-canine-s-1l-no-adapters\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"benjamin/wtp-canine-s-1l-no-adapters\")\n",
"classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"salida=classifier(text)\n",
"print(salida)"
]
},
{
"cell_type": "markdown",
"id": "8dc8cf71-eb95-490f-85f8-81a8a77a7471",
"metadata": {},
"source": [
"## 26 benjamin/wtp-canine-s-6l-no-adapters"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "a077b70f-2cb9-4735-a476-b06831039d23",
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:951\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:653\u001b[0m, in \u001b[0;36m_LazyConfigMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping:\n\u001b[1;32m--> 653\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[0;32m 654\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping[key]\n",
"\u001b[1;31mKeyError\u001b[0m: 'la-canine'",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[44], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mjson\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-6l-no-adapters\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-6l-no-adapters\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m classifier \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:953\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m--> 953\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 954\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe checkpoint you are trying to load has model type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 955\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut Transformers does not recognize this architecture. This could be because of an \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 956\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124missue with the checkpoint, or because your version of Transformers is out of date.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 957\u001b[0m )\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_dict(config_dict, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39munused_kwargs)\n\u001b[0;32m 959\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 960\u001b[0m \u001b[38;5;66;03m# Fallback: use pattern matching on the string.\u001b[39;00m\n\u001b[0;32m 961\u001b[0m \u001b[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001b[39;00m\n",
"\u001b[1;31mValueError\u001b[0m: The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date."
]
}
],
"source": [
"import json\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"benjamin/wtp-canine-s-6l-no-adapters\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"benjamin/wtp-canine-s-6l-no-adapters\")\n",
"classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"salida=classifier(text)\n",
"print(salida)"
]
},
{
"cell_type": "markdown",
"id": "8f962e5f-b928-4ad4-9c77-2f042e8c53d6",
"metadata": {},
"source": [
"## 27 benjamin/wtp-canine-s-12l-no-adapters"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "bce862a8-9ec5-4a82-bf78-d75851e9bf0e",
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:951\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:653\u001b[0m, in \u001b[0;36m_LazyConfigMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping:\n\u001b[1;32m--> 653\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[0;32m 654\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping[key]\n",
"\u001b[1;31mKeyError\u001b[0m: 'la-canine'",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[46], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mjson\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-12l-no-adapters\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-12l-no-adapters\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m classifier \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:953\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m--> 953\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 954\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe checkpoint you are trying to load has model type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 955\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut Transformers does not recognize this architecture. This could be because of an \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 956\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124missue with the checkpoint, or because your version of Transformers is out of date.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 957\u001b[0m )\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_dict(config_dict, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39munused_kwargs)\n\u001b[0;32m 959\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 960\u001b[0m \u001b[38;5;66;03m# Fallback: use pattern matching on the string.\u001b[39;00m\n\u001b[0;32m 961\u001b[0m \u001b[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001b[39;00m\n",
"\u001b[1;31mValueError\u001b[0m: The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date."
]
}
],
"source": [
"import json\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"benjamin/wtp-canine-s-12l-no-adapters\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"benjamin/wtp-canine-s-12l-no-adapters\")\n",
"classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"salida=classifier(text)\n",
"print(salida)"
]
},
{
"cell_type": "markdown",
"id": "23afb232-2a61-45c6-ae7a-d9bef25fb3d3",
"metadata": {},
"source": [
"## 28 Posos/ClinicalNER"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "c11476c7-d694-49a6-bd2b-7e300e81f23d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[]\n"
]
}
],
"source": [
"import json\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"Posos/ClinicalNER\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"Posos/ClinicalNER\")\n",
"classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"salida=classifier(text)\n",
"print(salida)"
]
},
{
"cell_type": "markdown",
"id": "4da2e211-d26e-4b3b-b395-6817fef1adba",
"metadata": {},
"source": [
"## 29 numind/NuNER-multilingual-v0.1"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "777410bc-d134-4439-9f3c-8840fa5a532c",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Some weights of BertForTokenClassification were not initialized from the model checkpoint at numind/NuNER-multilingual-v0.1 and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'entity': 'LABEL_1', 'score': 0.6517681, 'index': 1, 'word': 'So', 'start': 0, 'end': 2}, {'entity': 'LABEL_1', 'score': 0.6665356, 'index': 2, 'word': ',', 'start': 2, 'end': 3}, {'entity': 'LABEL_1', 'score': 0.6989242, 'index': 3, 'word': 'if', 'start': 4, 'end': 6}, {'entity': 'LABEL_1', 'score': 0.7015521, 'index': 4, 'word': 'you', 'start': 7, 'end': 10}, {'entity': 'LABEL_1', 'score': 0.69064546, 'index': 5, 'word': \"'\", 'start': 10, 'end': 11}, {'entity': 'LABEL_1', 'score': 0.6504881, 'index': 6, 'word': 're', 'start': 11, 'end': 13}, {'entity': 'LABEL_1', 'score': 0.57260615, 'index': 7, 'word': 'a', 'start': 14, 'end': 15}, {'entity': 'LABEL_1', 'score': 0.5917183, 'index': 8, 'word': 'NASA', 'start': 16, 'end': 20}, {'entity': 'LABEL_1', 'score': 0.5965599, 'index': 9, 'word': 'scientist', 'start': 21, 'end': 30}, {'entity': 'LABEL_1', 'score': 0.6810555, 'index': 10, 'word': ',', 'start': 30, 'end': 31}, {'entity': 'LABEL_1', 'score': 0.71639186, 'index': 11, 'word': 'you', 'start': 32, 'end': 35}, {'entity': 'LABEL_1', 'score': 0.70142627, 'index': 12, 'word': 'should', 'start': 36, 'end': 42}, {'entity': 'LABEL_1', 'score': 0.7038532, 'index': 13, 'word': 'be', 'start': 43, 'end': 45}, {'entity': 'LABEL_1', 'score': 0.7008251, 'index': 14, 'word': 'able', 'start': 46, 'end': 50}, {'entity': 'LABEL_1', 'score': 0.66452104, 'index': 15, 'word': 'to', 'start': 51, 'end': 53}, {'entity': 'LABEL_1', 'score': 0.63648593, 'index': 16, 'word': 'tell', 'start': 54, 'end': 58}, {'entity': 'LABEL_1', 'score': 0.6549855, 'index': 17, 'word': 'me', 'start': 59, 'end': 61}, {'entity': 'LABEL_0', 'score': 0.51807356, 'index': 18, 'word': 'the', 'start': 62, 'end': 65}, {'entity': 'LABEL_1', 'score': 0.50653565, 'index': 19, 'word': 'whole', 'start': 66, 'end': 71}, {'entity': 'LABEL_0', 'score': 0.52187973, 'index': 20, 'word': 'story', 'start': 72, 'end': 77}, {'entity': 'LABEL_0', 'score': 0.5346267, 'index': 21, 'word': 'about', 'start': 78, 'end': 83}, {'entity': 'LABEL_1', 'score': 0.55174303, 'index': 22, 'word': 'the', 'start': 84, 'end': 87}, {'entity': 'LABEL_1', 'score': 0.60781723, 'index': 23, 'word': 'Face', 'start': 88, 'end': 92}, {'entity': 'LABEL_1', 'score': 0.5665726, 'index': 24, 'word': 'On', 'start': 93, 'end': 95}, {'entity': 'LABEL_1', 'score': 0.5692423, 'index': 25, 'word': 'Mars', 'start': 96, 'end': 100}, {'entity': 'LABEL_1', 'score': 0.6011262, 'index': 26, 'word': ',', 'start': 100, 'end': 101}, {'entity': 'LABEL_1', 'score': 0.5368547, 'index': 27, 'word': 'which', 'start': 102, 'end': 107}, {'entity': 'LABEL_1', 'score': 0.5802307, 'index': 28, 'word': 'obvious', 'start': 108, 'end': 115}, {'entity': 'LABEL_1', 'score': 0.517514, 'index': 29, 'word': '##ly', 'start': 115, 'end': 117}, {'entity': 'LABEL_1', 'score': 0.55036914, 'index': 30, 'word': 'is', 'start': 118, 'end': 120}, {'entity': 'LABEL_1', 'score': 0.5529837, 'index': 31, 'word': 'evidence', 'start': 121, 'end': 129}, {'entity': 'LABEL_0', 'score': 0.5724075, 'index': 32, 'word': 'that', 'start': 130, 'end': 134}, {'entity': 'LABEL_0', 'score': 0.64600146, 'index': 33, 'word': 'there', 'start': 135, 'end': 140}, {'entity': 'LABEL_0', 'score': 0.6608648, 'index': 34, 'word': 'is', 'start': 141, 'end': 143}, {'entity': 'LABEL_0', 'score': 0.6958021, 'index': 35, 'word': 'life', 'start': 144, 'end': 148}, {'entity': 'LABEL_0', 'score': 0.62779915, 'index': 36, 'word': 'on', 'start': 149, 'end': 151}, {'entity': 'LABEL_0', 'score': 0.5481105, 'index': 37, 'word': 'Mars', 'start': 152, 'end': 156}, {'entity': 'LABEL_1', 'score': 0.56165004, 'index': 38, 'word': ',', 'start': 156, 'end': 157}, {'entity': 'LABEL_1', 'score': 0.551388, 'index': 39, 'word': 'and', 'start': 158, 'end': 161}, {'entity': 'LABEL_1', 'score': 0.5514617, 'index': 40, 'word': 'that', 'start': 162, 'end': 166}, {'entity': 'LABEL_1', 'score': 0.53406215, 'index': 41, 'word': 'the', 'start': 167, 'end': 170}, {'entity': 'LABEL_1', 'score': 0.6221385, 'index': 42, 'word': 'face', 'start': 171, 'end': 175}, {'entity': 'LABEL_1', 'score': 0.6061021, 'index': 43, 'word': 'was', 'start': 176, 'end': 179}, {'entity': 'LABEL_1', 'score': 0.55775326, 'index': 44, 'word': 'created', 'start': 180, 'end': 187}, {'entity': 'LABEL_0', 'score': 0.5654199, 'index': 45, 'word': 'by', 'start': 188, 'end': 190}, {'entity': 'LABEL_0', 'score': 0.50058967, 'index': 46, 'word': 'alien', 'start': 191, 'end': 196}, {'entity': 'LABEL_0', 'score': 0.5248566, 'index': 47, 'word': '##s', 'start': 196, 'end': 197}, {'entity': 'LABEL_1', 'score': 0.5247892, 'index': 48, 'word': ',', 'start': 197, 'end': 198}, {'entity': 'LABEL_1', 'score': 0.5430314, 'index': 49, 'word': 'correct', 'start': 199, 'end': 206}, {'entity': 'LABEL_1', 'score': 0.61337465, 'index': 50, 'word': '?', 'start': 206, 'end': 207}, {'entity': 'LABEL_1', 'score': 0.64854234, 'index': 51, 'word': '\"', 'start': 207, 'end': 208}, {'entity': 'LABEL_1', 'score': 0.6051055, 'index': 52, 'word': 'No', 'start': 209, 'end': 211}, {'entity': 'LABEL_1', 'score': 0.58601683, 'index': 53, 'word': ',', 'start': 211, 'end': 212}, {'entity': 'LABEL_0', 'score': 0.6085843, 'index': 54, 'word': 'twenty', 'start': 213, 'end': 219}, {'entity': 'LABEL_0', 'score': 0.6648703, 'index': 55, 'word': 'five', 'start': 220, 'end': 224}, {'entity': 'LABEL_0', 'score': 0.650303, 'index': 56, 'word': 'years', 'start': 225, 'end': 230}, {'entity': 'LABEL_0', 'score': 0.6452194, 'index': 57, 'word': 'ago', 'start': 231, 'end': 234}, {'entity': 'LABEL_1', 'score': 0.50657856, 'index': 58, 'word': ',', 'start': 234, 'end': 235}, {'entity': 'LABEL_1', 'score': 0.50954217, 'index': 59, 'word': 'our', 'start': 236, 'end': 239}, {'entity': 'LABEL_1', 'score': 0.5194712, 'index': 60, 'word': 'Viking', 'start': 240, 'end': 246}, {'entity': 'LABEL_0', 'score': 0.5189833, 'index': 61, 'word': '1', 'start': 247, 'end': 248}, {'entity': 'LABEL_0', 'score': 0.5036279, 'index': 62, 'word': 'spacecraft', 'start': 249, 'end': 259}, {'entity': 'LABEL_0', 'score': 0.5388934, 'index': 63, 'word': 'was', 'start': 260, 'end': 263}, {'entity': 'LABEL_0', 'score': 0.5811445, 'index': 64, 'word': 'ci', 'start': 264, 'end': 266}, {'entity': 'LABEL_0', 'score': 0.60810935, 'index': 65, 'word': '##rc', 'start': 266, 'end': 268}, {'entity': 'LABEL_0', 'score': 0.63988644, 'index': 66, 'word': '##ling', 'start': 268, 'end': 272}, {'entity': 'LABEL_0', 'score': 0.68350035, 'index': 67, 'word': 'the', 'start': 273, 'end': 276}, {'entity': 'LABEL_0', 'score': 0.5718535, 'index': 68, 'word': 'planet', 'start': 277, 'end': 283}, {'entity': 'LABEL_0', 'score': 0.5150864, 'index': 69, 'word': ',', 'start': 283, 'end': 284}, {'entity': 'LABEL_1', 'score': 0.5919597, 'index': 70, 'word': 'sna', 'start': 285, 'end': 288}, {'entity': 'LABEL_1', 'score': 0.5371689, 'index': 71, 'word': '##pping', 'start': 288, 'end': 293}, {'entity': 'LABEL_1', 'score': 0.5501098, 'index': 72, 'word': 'photos', 'start': 294, 'end': 300}, {'entity': 'LABEL_1', 'score': 0.58064604, 'index': 73, 'word': ',', 'start': 300, 'end': 301}, {'entity': 'LABEL_1', 'score': 0.602519, 'index': 74, 'word': 'when', 'start': 302, 'end': 306}, {'entity': 'LABEL_1', 'score': 0.53923917, 'index': 75, 'word': 'it', 'start': 307, 'end': 309}, {'entity': 'LABEL_1', 'score': 0.52948976, 'index': 76, 'word': 'spotted', 'start': 310, 'end': 317}, {'entity': 'LABEL_1', 'score': 0.53173506, 'index': 77, 'word': 'the', 'start': 318, 'end': 321}, {'entity': 'LABEL_1', 'score': 0.5621857, 'index': 78, 'word': 'sh', 'start': 322, 'end': 324}, {'entity': 'LABEL_1', 'score': 0.55711824, 'index': 79, 'word': '##adow', 'start': 324, 'end': 328}, {'entity': 'LABEL_1', 'score': 0.5415988, 'index': 80, 'word': '##y', 'start': 328, 'end': 329}, {'entity': 'LABEL_1', 'score': 0.50422186, 'index': 81, 'word': 'like', 'start': 330, 'end': 334}, {'entity': 'LABEL_1', 'score': 0.50749224, 'index': 82, 'word': '##ness', 'start': 334, 'end': 338}, {'entity': 'LABEL_0', 'score': 0.50134593, 'index': 83, 'word': 'of', 'start': 339, 'end': 341}, {'entity': 'LABEL_1', 'score': 0.5051905, 'index': 84, 'word': 'a', 'start': 342, 'end': 343}, {'entity': 'LABEL_1', 'score': 0.5539653, 'index': 85, 'word': 'human', 'start': 344, 'end': 349}, {'entity': 'LABEL_1', 'score': 0.60916597, 'index': 86, 'word': 'face', 'start': 350, 'end': 354}, {'entity': 'LABEL_1', 'score': 0.6022859, 'index': 87, 'word': '.', 'start': 354, 'end': 355}, {'entity': 'LABEL_1', 'score': 0.61335, 'index': 88, 'word': 'Us', 'start': 356, 'end': 358}, {'entity': 'LABEL_1', 'score': 0.56481904, 'index': 89, 'word': 'scientists', 'start': 359, 'end': 369}, {'entity': 'LABEL_1', 'score': 0.643385, 'index': 90, 'word': 'figure', 'start': 370, 'end': 376}, {'entity': 'LABEL_1', 'score': 0.60697997, 'index': 91, 'word': '##d', 'start': 376, 'end': 377}, {'entity': 'LABEL_1', 'score': 0.558008, 'index': 92, 'word': 'out', 'start': 378, 'end': 381}, {'entity': 'LABEL_1', 'score': 0.5639139, 'index': 93, 'word': 'that', 'start': 382, 'end': 386}, {'entity': 'LABEL_1', 'score': 0.5370862, 'index': 94, 'word': 'it', 'start': 387, 'end': 389}, {'entity': 'LABEL_1', 'score': 0.58077925, 'index': 95, 'word': 'was', 'start': 390, 'end': 393}, {'entity': 'LABEL_1', 'score': 0.51595956, 'index': 96, 'word': 'just', 'start': 394, 'end': 398}, {'entity': 'LABEL_1', 'score': 0.51781327, 'index': 97, 'word': 'another', 'start': 399, 'end': 406}, {'entity': 'LABEL_0', 'score': 0.53742176, 'index': 98, 'word': 'Mart', 'start': 407, 'end': 411}, {'entity': 'LABEL_0', 'score': 0.54195774, 'index': 99, 'word': '##ian', 'start': 411, 'end': 414}, {'entity': 'LABEL_0', 'score': 0.5194467, 'index': 100, 'word': 'mesa', 'start': 415, 'end': 419}, {'entity': 'LABEL_1', 'score': 0.5056166, 'index': 101, 'word': ',', 'start': 419, 'end': 420}, {'entity': 'LABEL_0', 'score': 0.5286414, 'index': 102, 'word': 'common', 'start': 421, 'end': 427}, {'entity': 'LABEL_0', 'score': 0.59091985, 'index': 103, 'word': 'around', 'start': 428, 'end': 434}, {'entity': 'LABEL_0', 'score': 0.5216974, 'index': 104, 'word': 'C', 'start': 435, 'end': 436}, {'entity': 'LABEL_0', 'score': 0.50349146, 'index': 105, 'word': '##yd', 'start': 436, 'end': 438}, {'entity': 'LABEL_0', 'score': 0.53423715, 'index': 106, 'word': '##onia', 'start': 438, 'end': 442}, {'entity': 'LABEL_1', 'score': 0.5149874, 'index': 107, 'word': ',', 'start': 442, 'end': 443}, {'entity': 'LABEL_1', 'score': 0.550047, 'index': 108, 'word': 'only', 'start': 444, 'end': 448}, {'entity': 'LABEL_1', 'score': 0.54703, 'index': 109, 'word': 'this', 'start': 449, 'end': 453}, {'entity': 'LABEL_1', 'score': 0.5274473, 'index': 110, 'word': 'one', 'start': 454, 'end': 457}, {'entity': 'LABEL_1', 'score': 0.60423684, 'index': 111, 'word': 'had', 'start': 458, 'end': 461}, {'entity': 'LABEL_1', 'score': 0.5206097, 'index': 112, 'word': 'sh', 'start': 462, 'end': 464}, {'entity': 'LABEL_1', 'score': 0.5060773, 'index': 113, 'word': '##adow', 'start': 464, 'end': 468}, {'entity': 'LABEL_1', 'score': 0.51237464, 'index': 114, 'word': '##s', 'start': 468, 'end': 469}, {'entity': 'LABEL_1', 'score': 0.5460441, 'index': 115, 'word': 'that', 'start': 470, 'end': 474}, {'entity': 'LABEL_1', 'score': 0.5336468, 'index': 116, 'word': 'made', 'start': 475, 'end': 479}, {'entity': 'LABEL_0', 'score': 0.5088002, 'index': 117, 'word': 'it', 'start': 480, 'end': 482}, {'entity': 'LABEL_0', 'score': 0.5061207, 'index': 118, 'word': 'look', 'start': 483, 'end': 487}, {'entity': 'LABEL_0', 'score': 0.6073445, 'index': 119, 'word': 'like', 'start': 488, 'end': 492}, {'entity': 'LABEL_0', 'score': 0.6060573, 'index': 120, 'word': 'an', 'start': 493, 'end': 495}, {'entity': 'LABEL_0', 'score': 0.5422901, 'index': 121, 'word': 'Egypt', 'start': 496, 'end': 501}, {'entity': 'LABEL_0', 'score': 0.5818128, 'index': 122, 'word': '##ion', 'start': 501, 'end': 504}, {'entity': 'LABEL_0', 'score': 0.57624054, 'index': 123, 'word': 'Ph', 'start': 505, 'end': 507}, {'entity': 'LABEL_0', 'score': 0.55589443, 'index': 124, 'word': '##ara', 'start': 507, 'end': 510}, {'entity': 'LABEL_0', 'score': 0.5907527, 'index': 125, 'word': '##oh', 'start': 510, 'end': 512}, {'entity': 'LABEL_0', 'score': 0.67880136, 'index': 126, 'word': '.', 'start': 512, 'end': 513}, {'entity': 'LABEL_0', 'score': 0.55919296, 'index': 127, 'word': 'Very', 'start': 514, 'end': 518}, {'entity': 'LABEL_0', 'score': 0.6202078, 'index': 128, 'word': 'few', 'start': 519, 'end': 522}, {'entity': 'LABEL_0', 'score': 0.65795386, 'index': 129, 'word': 'days', 'start': 523, 'end': 527}, {'entity': 'LABEL_0', 'score': 0.67949957, 'index': 130, 'word': 'later', 'start': 528, 'end': 533}, {'entity': 'LABEL_1', 'score': 0.5850206, 'index': 131, 'word': ',', 'start': 533, 'end': 534}, {'entity': 'LABEL_1', 'score': 0.6831487, 'index': 132, 'word': 'we', 'start': 535, 'end': 537}, {'entity': 'LABEL_1', 'score': 0.554138, 'index': 133, 'word': 'revealed', 'start': 538, 'end': 546}, {'entity': 'LABEL_0', 'score': 0.50729936, 'index': 134, 'word': 'the', 'start': 547, 'end': 550}, {'entity': 'LABEL_1', 'score': 0.5196401, 'index': 135, 'word': 'image', 'start': 551, 'end': 556}, {'entity': 'LABEL_1', 'score': 0.6263674, 'index': 136, 'word': 'for', 'start': 557, 'end': 560}, {'entity': 'LABEL_1', 'score': 0.6722497, 'index': 137, 'word': 'all', 'start': 561, 'end': 564}, {'entity': 'LABEL_1', 'score': 0.6646968, 'index': 138, 'word': 'to', 'start': 565, 'end': 567}, {'entity': 'LABEL_1', 'score': 0.55594933, 'index': 139, 'word': 'see', 'start': 568, 'end': 571}, {'entity': 'LABEL_1', 'score': 0.6437286, 'index': 140, 'word': ',', 'start': 571, 'end': 572}, {'entity': 'LABEL_1', 'score': 0.6288408, 'index': 141, 'word': 'and', 'start': 573, 'end': 576}, {'entity': 'LABEL_1', 'score': 0.67429084, 'index': 142, 'word': 'we', 'start': 577, 'end': 579}, {'entity': 'LABEL_1', 'score': 0.5014696, 'index': 143, 'word': 'made', 'start': 580, 'end': 584}, {'entity': 'LABEL_0', 'score': 0.54451215, 'index': 144, 'word': 'sure', 'start': 585, 'end': 589}, {'entity': 'LABEL_0', 'score': 0.5620309, 'index': 145, 'word': 'to', 'start': 590, 'end': 592}, {'entity': 'LABEL_0', 'score': 0.5541243, 'index': 146, 'word': 'note', 'start': 593, 'end': 597}, {'entity': 'LABEL_1', 'score': 0.5254825, 'index': 147, 'word': 'that', 'start': 598, 'end': 602}, {'entity': 'LABEL_1', 'score': 0.5633732, 'index': 148, 'word': 'it', 'start': 603, 'end': 605}, {'entity': 'LABEL_1', 'score': 0.5730712, 'index': 149, 'word': 'was', 'start': 606, 'end': 609}, {'entity': 'LABEL_0', 'score': 0.5550324, 'index': 150, 'word': 'a', 'start': 610, 'end': 611}, {'entity': 'LABEL_0', 'score': 0.5353202, 'index': 151, 'word': 'huge', 'start': 612, 'end': 616}, {'entity': 'LABEL_0', 'score': 0.5435573, 'index': 152, 'word': 'rock', 'start': 617, 'end': 621}, {'entity': 'LABEL_0', 'score': 0.54164654, 'index': 153, 'word': 'formation', 'start': 622, 'end': 631}, {'entity': 'LABEL_0', 'score': 0.5117593, 'index': 154, 'word': 'that', 'start': 632, 'end': 636}, {'entity': 'LABEL_1', 'score': 0.51742077, 'index': 155, 'word': 'just', 'start': 637, 'end': 641}, {'entity': 'LABEL_1', 'score': 0.5446302, 'index': 156, 'word': 'res', 'start': 642, 'end': 645}, {'entity': 'LABEL_1', 'score': 0.50045645, 'index': 157, 'word': '##emble', 'start': 645, 'end': 650}, {'entity': 'LABEL_1', 'score': 0.5215423, 'index': 158, 'word': '##d', 'start': 650, 'end': 651}, {'entity': 'LABEL_0', 'score': 0.55436605, 'index': 159, 'word': 'a', 'start': 652, 'end': 653}, {'entity': 'LABEL_1', 'score': 0.5265072, 'index': 160, 'word': 'human', 'start': 654, 'end': 659}, {'entity': 'LABEL_1', 'score': 0.55062467, 'index': 161, 'word': 'head', 'start': 660, 'end': 664}, {'entity': 'LABEL_1', 'score': 0.5384184, 'index': 162, 'word': 'and', 'start': 665, 'end': 668}, {'entity': 'LABEL_1', 'score': 0.5848135, 'index': 163, 'word': 'face', 'start': 669, 'end': 673}, {'entity': 'LABEL_1', 'score': 0.57094294, 'index': 164, 'word': ',', 'start': 673, 'end': 674}, {'entity': 'LABEL_1', 'score': 0.5754807, 'index': 165, 'word': 'but', 'start': 675, 'end': 678}, {'entity': 'LABEL_1', 'score': 0.5758484, 'index': 166, 'word': 'all', 'start': 679, 'end': 682}, {'entity': 'LABEL_1', 'score': 0.54321927, 'index': 167, 'word': 'of', 'start': 683, 'end': 685}, {'entity': 'LABEL_1', 'score': 0.5251982, 'index': 168, 'word': 'it', 'start': 686, 'end': 688}, {'entity': 'LABEL_1', 'score': 0.62576276, 'index': 169, 'word': 'was', 'start': 689, 'end': 692}, {'entity': 'LABEL_1', 'score': 0.5525769, 'index': 170, 'word': 'formed', 'start': 693, 'end': 699}, {'entity': 'LABEL_1', 'score': 0.5072356, 'index': 171, 'word': 'by', 'start': 700, 'end': 702}, {'entity': 'LABEL_1', 'score': 0.54959744, 'index': 172, 'word': 'sh', 'start': 703, 'end': 705}, {'entity': 'LABEL_1', 'score': 0.5374098, 'index': 173, 'word': '##adow', 'start': 705, 'end': 709}, {'entity': 'LABEL_1', 'score': 0.55133754, 'index': 174, 'word': '##s', 'start': 709, 'end': 710}, {'entity': 'LABEL_0', 'score': 0.692533, 'index': 175, 'word': '.', 'start': 710, 'end': 711}, {'entity': 'LABEL_1', 'score': 0.68988186, 'index': 176, 'word': 'We', 'start': 712, 'end': 714}, {'entity': 'LABEL_1', 'score': 0.58056563, 'index': 177, 'word': 'only', 'start': 715, 'end': 719}, {'entity': 'LABEL_1', 'score': 0.52151, 'index': 178, 'word': 'announced', 'start': 720, 'end': 729}, {'entity': 'LABEL_0', 'score': 0.5221571, 'index': 179, 'word': 'it', 'start': 730, 'end': 732}, {'entity': 'LABEL_1', 'score': 0.5608035, 'index': 180, 'word': 'because', 'start': 733, 'end': 740}, {'entity': 'LABEL_1', 'score': 0.6509734, 'index': 181, 'word': 'we', 'start': 741, 'end': 743}, {'entity': 'LABEL_1', 'score': 0.58938694, 'index': 182, 'word': 'thought', 'start': 744, 'end': 751}, {'entity': 'LABEL_1', 'score': 0.5525905, 'index': 183, 'word': 'it', 'start': 752, 'end': 754}, {'entity': 'LABEL_1', 'score': 0.5674345, 'index': 184, 'word': 'would', 'start': 755, 'end': 760}, {'entity': 'LABEL_1', 'score': 0.5639775, 'index': 185, 'word': 'be', 'start': 761, 'end': 763}, {'entity': 'LABEL_1', 'score': 0.501266, 'index': 186, 'word': 'a', 'start': 764, 'end': 765}, {'entity': 'LABEL_0', 'score': 0.50765914, 'index': 187, 'word': 'good', 'start': 766, 'end': 770}, {'entity': 'LABEL_0', 'score': 0.52330655, 'index': 188, 'word': 'way', 'start': 771, 'end': 774}, {'entity': 'LABEL_1', 'score': 0.57470995, 'index': 189, 'word': 'to', 'start': 775, 'end': 777}, {'entity': 'LABEL_1', 'score': 0.61295885, 'index': 190, 'word': 'engage', 'start': 778, 'end': 784}, {'entity': 'LABEL_0', 'score': 0.5334338, 'index': 191, 'word': 'the', 'start': 785, 'end': 788}, {'entity': 'LABEL_0', 'score': 0.51933604, 'index': 192, 'word': 'public', 'start': 789, 'end': 795}, {'entity': 'LABEL_1', 'score': 0.5949142, 'index': 193, 'word': 'with', 'start': 796, 'end': 800}, {'entity': 'LABEL_1', 'score': 0.6003629, 'index': 194, 'word': 'NASA', 'start': 801, 'end': 805}, {'entity': 'LABEL_1', 'score': 0.5744194, 'index': 195, 'word': \"'\", 'start': 805, 'end': 806}, {'entity': 'LABEL_1', 'score': 0.57181925, 'index': 196, 'word': 's', 'start': 806, 'end': 807}, {'entity': 'LABEL_1', 'score': 0.53557616, 'index': 197, 'word': 'findings', 'start': 808, 'end': 816}, {'entity': 'LABEL_1', 'score': 0.6095189, 'index': 198, 'word': ',', 'start': 816, 'end': 817}, {'entity': 'LABEL_1', 'score': 0.6437797, 'index': 199, 'word': 'and', 'start': 818, 'end': 821}, {'entity': 'LABEL_1', 'score': 0.55779886, 'index': 200, 'word': 'at', 'start': 822, 'end': 824}, {'entity': 'LABEL_1', 'score': 0.52227074, 'index': 201, 'word': '##rra', 'start': 824, 'end': 827}, {'entity': 'LABEL_1', 'score': 0.5302549, 'index': 202, 'word': '##ct', 'start': 827, 'end': 829}, {'entity': 'LABEL_0', 'score': 0.50743705, 'index': 203, 'word': 'attention', 'start': 830, 'end': 839}, {'entity': 'LABEL_0', 'score': 0.54697585, 'index': 204, 'word': 'to', 'start': 840, 'end': 842}, {'entity': 'LABEL_0', 'score': 0.51355886, 'index': 205, 'word': 'Mars', 'start': 843, 'end': 847}, {'entity': 'LABEL_1', 'score': 0.5464499, 'index': 206, 'word': '-', 'start': 847, 'end': 848}, {'entity': 'LABEL_1', 'score': 0.59227043, 'index': 207, 'word': '-', 'start': 848, 'end': 849}, {'entity': 'LABEL_1', 'score': 0.594104, 'index': 208, 'word': 'and', 'start': 850, 'end': 853}, {'entity': 'LABEL_1', 'score': 0.5491132, 'index': 209, 'word': 'it', 'start': 854, 'end': 856}, {'entity': 'LABEL_1', 'score': 0.5387794, 'index': 210, 'word': 'did', 'start': 857, 'end': 860}, {'entity': 'LABEL_0', 'score': 0.6830391, 'index': 211, 'word': '.', 'start': 860, 'end': 861}, {'entity': 'LABEL_1', 'score': 0.5619964, 'index': 212, 'word': 'The', 'start': 863, 'end': 866}, {'entity': 'LABEL_1', 'score': 0.6061772, 'index': 213, 'word': 'face', 'start': 867, 'end': 871}, {'entity': 'LABEL_1', 'score': 0.5403782, 'index': 214, 'word': 'on', 'start': 872, 'end': 874}, {'entity': 'LABEL_1', 'score': 0.553967, 'index': 215, 'word': 'Mars', 'start': 875, 'end': 879}, {'entity': 'LABEL_1', 'score': 0.5517386, 'index': 216, 'word': 'soon', 'start': 880, 'end': 884}, {'entity': 'LABEL_1', 'score': 0.5017604, 'index': 217, 'word': 'became', 'start': 885, 'end': 891}, {'entity': 'LABEL_0', 'score': 0.6197886, 'index': 218, 'word': 'a', 'start': 892, 'end': 893}, {'entity': 'LABEL_0', 'score': 0.5807936, 'index': 219, 'word': 'pop', 'start': 894, 'end': 897}, {'entity': 'LABEL_0', 'score': 0.60519373, 'index': 220, 'word': 'i', 'start': 898, 'end': 899}, {'entity': 'LABEL_0', 'score': 0.63208836, 'index': 221, 'word': '##con', 'start': 899, 'end': 902}, {'entity': 'LABEL_1', 'score': 0.69506586, 'index': 222, 'word': ';', 'start': 902, 'end': 903}, {'entity': 'LABEL_1', 'score': 0.5886576, 'index': 223, 'word': 'shot', 'start': 904, 'end': 908}, {'entity': 'LABEL_1', 'score': 0.5002726, 'index': 224, 'word': 'in', 'start': 909, 'end': 911}, {'entity': 'LABEL_1', 'score': 0.557791, 'index': 225, 'word': 'movies', 'start': 912, 'end': 918}, {'entity': 'LABEL_1', 'score': 0.6290412, 'index': 226, 'word': ',', 'start': 918, 'end': 919}, {'entity': 'LABEL_1', 'score': 0.52317363, 'index': 227, 'word': 'appeared', 'start': 920, 'end': 928}, {'entity': 'LABEL_0', 'score': 0.5399968, 'index': 228, 'word': 'in', 'start': 929, 'end': 931}, {'entity': 'LABEL_0', 'score': 0.5414876, 'index': 229, 'word': 'books', 'start': 932, 'end': 937}, {'entity': 'LABEL_0', 'score': 0.5429459, 'index': 230, 'word': ',', 'start': 937, 'end': 938}, {'entity': 'LABEL_0', 'score': 0.5682568, 'index': 231, 'word': 'magazines', 'start': 939, 'end': 948}, {'entity': 'LABEL_0', 'score': 0.5222259, 'index': 232, 'word': ',', 'start': 948, 'end': 949}, {'entity': 'LABEL_0', 'score': 0.5639273, 'index': 233, 'word': 'radio', 'start': 950, 'end': 955}, {'entity': 'LABEL_0', 'score': 0.56908894, 'index': 234, 'word': 'talk', 'start': 956, 'end': 960}, {'entity': 'LABEL_0', 'score': 0.5995537, 'index': 235, 'word': 'shows', 'start': 961, 'end': 966}, {'entity': 'LABEL_1', 'score': 0.55398846, 'index': 236, 'word': ',', 'start': 966, 'end': 967}, {'entity': 'LABEL_1', 'score': 0.53322935, 'index': 237, 'word': 'and', 'start': 968, 'end': 971}, {'entity': 'LABEL_1', 'score': 0.5473446, 'index': 238, 'word': 'hau', 'start': 972, 'end': 975}, {'entity': 'LABEL_1', 'score': 0.5041182, 'index': 239, 'word': '##nted', 'start': 975, 'end': 979}, {'entity': 'LABEL_0', 'score': 0.55119586, 'index': 240, 'word': 'gr', 'start': 980, 'end': 982}, {'entity': 'LABEL_0', 'score': 0.56132877, 'index': 241, 'word': '##oce', 'start': 982, 'end': 985}, {'entity': 'LABEL_0', 'score': 0.55828726, 'index': 242, 'word': '##ry', 'start': 985, 'end': 987}, {'entity': 'LABEL_0', 'score': 0.561964, 'index': 243, 'word': 'store', 'start': 988, 'end': 993}, {'entity': 'LABEL_0', 'score': 0.54090506, 'index': 244, 'word': 'check', 'start': 994, 'end': 999}, {'entity': 'LABEL_0', 'score': 0.54825944, 'index': 245, 'word': '##out', 'start': 999, 'end': 1002}, {'entity': 'LABEL_0', 'score': 0.55701, 'index': 246, 'word': 'lines', 'start': 1003, 'end': 1008}, {'entity': 'LABEL_0', 'score': 0.5457833, 'index': 247, 'word': 'for', 'start': 1009, 'end': 1012}, {'entity': 'LABEL_0', 'score': 0.6821632, 'index': 248, 'word': '25', 'start': 1013, 'end': 1015}, {'entity': 'LABEL_0', 'score': 0.660666, 'index': 249, 'word': 'years', 'start': 1016, 'end': 1021}, {'entity': 'LABEL_0', 'score': 0.68063366, 'index': 250, 'word': '.', 'start': 1021, 'end': 1022}, {'entity': 'LABEL_1', 'score': 0.5878949, 'index': 251, 'word': 'Some', 'start': 1023, 'end': 1027}, {'entity': 'LABEL_1', 'score': 0.56123006, 'index': 252, 'word': 'people', 'start': 1028, 'end': 1034}, {'entity': 'LABEL_1', 'score': 0.6098995, 'index': 253, 'word': 'thought', 'start': 1035, 'end': 1042}, {'entity': 'LABEL_0', 'score': 0.5661758, 'index': 254, 'word': 'the', 'start': 1043, 'end': 1046}, {'entity': 'LABEL_0', 'score': 0.5386031, 'index': 255, 'word': 'natural', 'start': 1047, 'end': 1054}, {'entity': 'LABEL_0', 'score': 0.52325475, 'index': 256, 'word': 'land', 'start': 1055, 'end': 1059}, {'entity': 'LABEL_0', 'score': 0.5261488, 'index': 257, 'word': '##form', 'start': 1059, 'end': 1063}, {'entity': 'LABEL_1', 'score': 0.5174967, 'index': 258, 'word': 'was', 'start': 1064, 'end': 1067}, {'entity': 'LABEL_0', 'score': 0.5168788, 'index': 259, 'word': 'evidence', 'start': 1068, 'end': 1076}, {'entity': 'LABEL_0', 'score': 0.6249421, 'index': 260, 'word': 'of', 'start': 1077, 'end': 1079}, {'entity': 'LABEL_0', 'score': 0.70964944, 'index': 261, 'word': 'life', 'start': 1080, 'end': 1084}, {'entity': 'LABEL_0', 'score': 0.685455, 'index': 262, 'word': 'on', 'start': 1085, 'end': 1087}, {'entity': 'LABEL_0', 'score': 0.5668801, 'index': 263, 'word': 'Mars', 'start': 1088, 'end': 1092}, {'entity': 'LABEL_0', 'score': 0.5259334, 'index': 264, 'word': ',', 'start': 1092, 'end': 1093}, {'entity': 'LABEL_1', 'score': 0.60075504, 'index': 265, 'word': 'and', 'start': 1094, 'end': 1097}, {'entity': 'LABEL_1', 'score': 0.6089564, 'index': 266, 'word': 'that', 'start': 1098, 'end': 1102}, {'entity': 'LABEL_1', 'score': 0.5566934, 'index': 267, 'word': 'us', 'start': 1103, 'end': 1105}, {'entity': 'LABEL_1', 'score': 0.565986, 'index': 268, 'word': 'scientists', 'start': 1106, 'end': 1116}, {'entity': 'LABEL_1', 'score': 0.63284963, 'index': 269, 'word': 'wanted', 'start': 1117, 'end': 1123}, {'entity': 'LABEL_1', 'score': 0.5852543, 'index': 270, 'word': 'to', 'start': 1124, 'end': 1126}, {'entity': 'LABEL_1', 'score': 0.58516186, 'index': 271, 'word': 'hide', 'start': 1127, 'end': 1131}, {'entity': 'LABEL_1', 'score': 0.5035104, 'index': 272, 'word': 'it', 'start': 1132, 'end': 1134}, {'entity': 'LABEL_1', 'score': 0.55418307, 'index': 273, 'word': ',', 'start': 1134, 'end': 1135}, {'entity': 'LABEL_1', 'score': 0.632634, 'index': 274, 'word': 'but', 'start': 1136, 'end': 1139}, {'entity': 'LABEL_1', 'score': 0.64646935, 'index': 275, 'word': 'really', 'start': 1140, 'end': 1146}, {'entity': 'LABEL_1', 'score': 0.60271597, 'index': 276, 'word': ',', 'start': 1146, 'end': 1147}, {'entity': 'LABEL_1', 'score': 0.55116373, 'index': 277, 'word': 'the', 'start': 1148, 'end': 1151}, {'entity': 'LABEL_1', 'score': 0.5534294, 'index': 278, 'word': 'defender', 'start': 1152, 'end': 1160}, {'entity': 'LABEL_0', 'score': 0.51253384, 'index': 279, 'word': '##s', 'start': 1160, 'end': 1161}, {'entity': 'LABEL_0', 'score': 0.51277995, 'index': 280, 'word': 'of', 'start': 1162, 'end': 1164}, {'entity': 'LABEL_0', 'score': 0.50931233, 'index': 281, 'word': 'the', 'start': 1165, 'end': 1168}, {'entity': 'LABEL_1', 'score': 0.5213263, 'index': 282, 'word': 'NASA', 'start': 1169, 'end': 1173}, {'entity': 'LABEL_1', 'score': 0.52774245, 'index': 283, 'word': 'budget', 'start': 1174, 'end': 1180}, {'entity': 'LABEL_1', 'score': 0.6431934, 'index': 284, 'word': 'wish', 'start': 1181, 'end': 1185}, {'entity': 'LABEL_0', 'score': 0.6439814, 'index': 285, 'word': 'there', 'start': 1186, 'end': 1191}, {'entity': 'LABEL_0', 'score': 0.6172206, 'index': 286, 'word': 'was', 'start': 1192, 'end': 1195}, {'entity': 'LABEL_0', 'score': 0.6588487, 'index': 287, 'word': 'ancient', 'start': 1196, 'end': 1203}, {'entity': 'LABEL_0', 'score': 0.66718197, 'index': 288, 'word': 'civili', 'start': 1204, 'end': 1210}, {'entity': 'LABEL_0', 'score': 0.67449886, 'index': 289, 'word': '##zation', 'start': 1210, 'end': 1216}, {'entity': 'LABEL_0', 'score': 0.6369945, 'index': 290, 'word': 'on', 'start': 1217, 'end': 1219}, {'entity': 'LABEL_0', 'score': 0.54807943, 'index': 291, 'word': 'Mars', 'start': 1220, 'end': 1224}, {'entity': 'LABEL_0', 'score': 0.6829598, 'index': 292, 'word': '.', 'start': 1224, 'end': 1225}, {'entity': 'LABEL_1', 'score': 0.69923604, 'index': 293, 'word': 'We', 'start': 1226, 'end': 1228}, {'entity': 'LABEL_1', 'score': 0.5304869, 'index': 294, 'word': 'decided', 'start': 1229, 'end': 1236}, {'entity': 'LABEL_0', 'score': 0.50105697, 'index': 295, 'word': 'to', 'start': 1237, 'end': 1239}, {'entity': 'LABEL_1', 'score': 0.52295804, 'index': 296, 'word': 'take', 'start': 1240, 'end': 1244}, {'entity': 'LABEL_0', 'score': 0.50881416, 'index': 297, 'word': 'another', 'start': 1245, 'end': 1252}, {'entity': 'LABEL_1', 'score': 0.51416457, 'index': 298, 'word': 'shot', 'start': 1253, 'end': 1257}, {'entity': 'LABEL_1', 'score': 0.5382968, 'index': 299, 'word': 'just', 'start': 1258, 'end': 1262}, {'entity': 'LABEL_1', 'score': 0.56296045, 'index': 300, 'word': 'to', 'start': 1263, 'end': 1265}, {'entity': 'LABEL_1', 'score': 0.59076697, 'index': 301, 'word': 'make', 'start': 1266, 'end': 1270}, {'entity': 'LABEL_1', 'score': 0.5739148, 'index': 302, 'word': 'sure', 'start': 1271, 'end': 1275}, {'entity': 'LABEL_1', 'score': 0.6504669, 'index': 303, 'word': 'we', 'start': 1276, 'end': 1278}, {'entity': 'LABEL_1', 'score': 0.6707218, 'index': 304, 'word': 'were', 'start': 1279, 'end': 1283}, {'entity': 'LABEL_1', 'score': 0.5913361, 'index': 305, 'word': '##n', 'start': 1283, 'end': 1284}, {'entity': 'LABEL_1', 'score': 0.62763625, 'index': 306, 'word': \"'\", 'start': 1284, 'end': 1285}, {'entity': 'LABEL_1', 'score': 0.6344838, 'index': 307, 'word': 't', 'start': 1285, 'end': 1286}, {'entity': 'LABEL_1', 'score': 0.65749675, 'index': 308, 'word': 'wrong', 'start': 1287, 'end': 1292}, {'entity': 'LABEL_1', 'score': 0.59669024, 'index': 309, 'word': ',', 'start': 1292, 'end': 1293}, {'entity': 'LABEL_1', 'score': 0.5044974, 'index': 310, 'word': 'on', 'start': 1294, 'end': 1296}, {'entity': 'LABEL_0', 'score': 0.5337205, 'index': 311, 'word': 'April', 'start': 1297, 'end': 1302}, {'entity': 'LABEL_0', 'score': 0.57182956, 'index': 312, 'word': '5', 'start': 1303, 'end': 1304}, {'entity': 'LABEL_0', 'score': 0.5762897, 'index': 313, 'word': ',', 'start': 1304, 'end': 1305}, {'entity': 'LABEL_1', 'score': 0.5169943, 'index': 314, 'word': '1998', 'start': 1306, 'end': 1310}, {'entity': 'LABEL_1', 'score': 0.5751032, 'index': 315, 'word': '.', 'start': 1310, 'end': 1311}, {'entity': 'LABEL_1', 'score': 0.61884916, 'index': 316, 'word': 'Michael', 'start': 1312, 'end': 1319}, {'entity': 'LABEL_1', 'score': 0.6418079, 'index': 317, 'word': 'Mali', 'start': 1320, 'end': 1324}, {'entity': 'LABEL_1', 'score': 0.5908292, 'index': 318, 'word': '##n', 'start': 1324, 'end': 1325}, {'entity': 'LABEL_1', 'score': 0.6004406, 'index': 319, 'word': 'and', 'start': 1326, 'end': 1329}, {'entity': 'LABEL_1', 'score': 0.5574665, 'index': 320, 'word': 'his', 'start': 1330, 'end': 1333}, {'entity': 'LABEL_1', 'score': 0.57928157, 'index': 321, 'word': 'Mars', 'start': 1334, 'end': 1338}, {'entity': 'LABEL_1', 'score': 0.54800403, 'index': 322, 'word': 'Or', 'start': 1339, 'end': 1341}, {'entity': 'LABEL_1', 'score': 0.52113616, 'index': 323, 'word': '##biter', 'start': 1341, 'end': 1346}, {'entity': 'LABEL_1', 'score': 0.5951806, 'index': 324, 'word': 'camera', 'start': 1347, 'end': 1353}, {'entity': 'LABEL_1', 'score': 0.6002175, 'index': 325, 'word': 'team', 'start': 1354, 'end': 1358}, {'entity': 'LABEL_0', 'score': 0.50268763, 'index': 326, 'word': 'took', 'start': 1359, 'end': 1363}, {'entity': 'LABEL_0', 'score': 0.5686814, 'index': 327, 'word': 'a', 'start': 1364, 'end': 1365}, {'entity': 'LABEL_0', 'score': 0.567501, 'index': 328, 'word': 'picture', 'start': 1366, 'end': 1373}, {'entity': 'LABEL_0', 'score': 0.5302457, 'index': 329, 'word': 'that', 'start': 1374, 'end': 1378}, {'entity': 'LABEL_1', 'score': 0.5295077, 'index': 330, 'word': 'was', 'start': 1379, 'end': 1382}, {'entity': 'LABEL_0', 'score': 0.55978584, 'index': 331, 'word': 'ten', 'start': 1383, 'end': 1386}, {'entity': 'LABEL_0', 'score': 0.56129557, 'index': 332, 'word': 'times', 'start': 1387, 'end': 1392}, {'entity': 'LABEL_0', 'score': 0.5239482, 'index': 333, 'word': 'sharp', 'start': 1393, 'end': 1398}, {'entity': 'LABEL_0', 'score': 0.60185, 'index': 334, 'word': '##er', 'start': 1398, 'end': 1400}, {'entity': 'LABEL_0', 'score': 0.619771, 'index': 335, 'word': 'than', 'start': 1401, 'end': 1405}, {'entity': 'LABEL_0', 'score': 0.56753343, 'index': 336, 'word': 'the', 'start': 1406, 'end': 1409}, {'entity': 'LABEL_0', 'score': 0.5345476, 'index': 337, 'word': 'original', 'start': 1410, 'end': 1418}, {'entity': 'LABEL_0', 'score': 0.53111845, 'index': 338, 'word': 'Viking', 'start': 1419, 'end': 1425}, {'entity': 'LABEL_0', 'score': 0.51020545, 'index': 339, 'word': 'photos', 'start': 1426, 'end': 1432}, {'entity': 'LABEL_1', 'score': 0.5032632, 'index': 340, 'word': ',', 'start': 1432, 'end': 1433}, {'entity': 'LABEL_0', 'score': 0.5354122, 'index': 341, 'word': 'reveal', 'start': 1434, 'end': 1440}, {'entity': 'LABEL_0', 'score': 0.54571337, 'index': 342, 'word': '##ing', 'start': 1440, 'end': 1443}, {'entity': 'LABEL_0', 'score': 0.6040675, 'index': 343, 'word': 'a', 'start': 1444, 'end': 1445}, {'entity': 'LABEL_0', 'score': 0.5651683, 'index': 344, 'word': 'natural', 'start': 1446, 'end': 1453}, {'entity': 'LABEL_0', 'score': 0.54561985, 'index': 345, 'word': 'land', 'start': 1454, 'end': 1458}, {'entity': 'LABEL_0', 'score': 0.5547739, 'index': 346, 'word': '##form', 'start': 1458, 'end': 1462}, {'entity': 'LABEL_0', 'score': 0.54526246, 'index': 347, 'word': ',', 'start': 1462, 'end': 1463}, {'entity': 'LABEL_0', 'score': 0.5390101, 'index': 348, 'word': 'which', 'start': 1464, 'end': 1469}, {'entity': 'LABEL_0', 'score': 0.5140612, 'index': 349, 'word': 'meant', 'start': 1470, 'end': 1475}, {'entity': 'LABEL_0', 'score': 0.5335286, 'index': 350, 'word': 'no', 'start': 1476, 'end': 1478}, {'entity': 'LABEL_1', 'score': 0.53740835, 'index': 351, 'word': 'alien', 'start': 1479, 'end': 1484}, {'entity': 'LABEL_0', 'score': 0.5383639, 'index': 352, 'word': 'monument', 'start': 1485, 'end': 1493}, {'entity': 'LABEL_0', 'score': 0.678678, 'index': 353, 'word': '.', 'start': 1493, 'end': 1494}, {'entity': 'LABEL_0', 'score': 0.5126649, 'index': 354, 'word': '\"', 'start': 1495, 'end': 1496}, {'entity': 'LABEL_1', 'score': 0.5785337, 'index': 355, 'word': 'But', 'start': 1496, 'end': 1499}, {'entity': 'LABEL_0', 'score': 0.51326215, 'index': 356, 'word': 'that', 'start': 1500, 'end': 1504}, {'entity': 'LABEL_0', 'score': 0.52528965, 'index': 357, 'word': 'picture', 'start': 1505, 'end': 1512}, {'entity': 'LABEL_1', 'score': 0.6512115, 'index': 358, 'word': 'wasn', 'start': 1513, 'end': 1517}, {'entity': 'LABEL_1', 'score': 0.60384643, 'index': 359, 'word': \"'\", 'start': 1517, 'end': 1518}, {'entity': 'LABEL_1', 'score': 0.59462476, 'index': 360, 'word': 't', 'start': 1518, 'end': 1519}, {'entity': 'LABEL_1', 'score': 0.63325346, 'index': 361, 'word': 'very', 'start': 1520, 'end': 1524}, {'entity': 'LABEL_1', 'score': 0.6392778, 'index': 362, 'word': 'clear', 'start': 1525, 'end': 1530}, {'entity': 'LABEL_1', 'score': 0.6274112, 'index': 363, 'word': 'at', 'start': 1531, 'end': 1533}, {'entity': 'LABEL_1', 'score': 0.63470787, 'index': 364, 'word': 'all', 'start': 1534, 'end': 1537}, {'entity': 'LABEL_1', 'score': 0.5834824, 'index': 365, 'word': ',', 'start': 1537, 'end': 1538}, {'entity': 'LABEL_1', 'score': 0.6205755, 'index': 366, 'word': 'which', 'start': 1539, 'end': 1544}, {'entity': 'LABEL_1', 'score': 0.6719913, 'index': 367, 'word': 'could', 'start': 1545, 'end': 1550}, {'entity': 'LABEL_1', 'score': 0.62575376, 'index': 368, 'word': 'mean', 'start': 1551, 'end': 1555}, {'entity': 'LABEL_1', 'score': 0.5630169, 'index': 369, 'word': 'alien', 'start': 1556, 'end': 1561}, {'entity': 'LABEL_1', 'score': 0.51273495, 'index': 370, 'word': 'marking', 'start': 1562, 'end': 1569}, {'entity': 'LABEL_1', 'score': 0.5028247, 'index': 371, 'word': '##s', 'start': 1569, 'end': 1570}, {'entity': 'LABEL_1', 'score': 0.6543652, 'index': 372, 'word': 'were', 'start': 1571, 'end': 1575}, {'entity': 'LABEL_1', 'score': 0.5995173, 'index': 373, 'word': 'hidden', 'start': 1576, 'end': 1582}, {'entity': 'LABEL_1', 'score': 0.54646325, 'index': 374, 'word': 'by', 'start': 1583, 'end': 1585}, {'entity': 'LABEL_0', 'score': 0.53214496, 'index': 375, 'word': 'ha', 'start': 1586, 'end': 1588}, {'entity': 'LABEL_0', 'score': 0.5461479, 'index': 376, 'word': '##ze', 'start': 1588, 'end': 1590}, {'entity': 'LABEL_1', 'score': 0.50528497, 'index': 377, 'word': '\"', 'start': 1590, 'end': 1591}, {'entity': 'LABEL_1', 'score': 0.55964, 'index': 378, 'word': 'Well', 'start': 1592, 'end': 1596}, {'entity': 'LABEL_1', 'score': 0.50823045, 'index': 379, 'word': 'no', 'start': 1597, 'end': 1599}, {'entity': 'LABEL_1', 'score': 0.54237086, 'index': 380, 'word': ',', 'start': 1599, 'end': 1600}, {'entity': 'LABEL_1', 'score': 0.54179376, 'index': 381, 'word': 'ye', 'start': 1601, 'end': 1603}, {'entity': 'LABEL_1', 'score': 0.5154993, 'index': 382, 'word': '##s', 'start': 1603, 'end': 1604}, {'entity': 'LABEL_0', 'score': 0.54574096, 'index': 383, 'word': 'that', 'start': 1605, 'end': 1609}, {'entity': 'LABEL_0', 'score': 0.5339636, 'index': 384, 'word': 'rum', 'start': 1610, 'end': 1613}, {'entity': 'LABEL_0', 'score': 0.60298085, 'index': 385, 'word': '##or', 'start': 1613, 'end': 1615}, {'entity': 'LABEL_0', 'score': 0.5779869, 'index': 386, 'word': 'started', 'start': 1616, 'end': 1623}, {'entity': 'LABEL_1', 'score': 0.57263666, 'index': 387, 'word': ',', 'start': 1623, 'end': 1624}, {'entity': 'LABEL_1', 'score': 0.58315307, 'index': 388, 'word': 'but', 'start': 1625, 'end': 1628}, {'entity': 'LABEL_1', 'score': 0.5169765, 'index': 389, 'word': 'to', 'start': 1629, 'end': 1631}, {'entity': 'LABEL_1', 'score': 0.53323334, 'index': 390, 'word': 'prove', 'start': 1632, 'end': 1637}, {'entity': 'LABEL_1', 'score': 0.5946281, 'index': 391, 'word': 'them', 'start': 1638, 'end': 1642}, {'entity': 'LABEL_1', 'score': 0.6326935, 'index': 392, 'word': 'wrong', 'start': 1643, 'end': 1648}, {'entity': 'LABEL_0', 'score': 0.51224774, 'index': 393, 'word': 'on', 'start': 1649, 'end': 1651}, {'entity': 'LABEL_0', 'score': 0.50525755, 'index': 394, 'word': 'April', 'start': 1652, 'end': 1657}, {'entity': 'LABEL_0', 'score': 0.53439724, 'index': 395, 'word': '8', 'start': 1658, 'end': 1659}, {'entity': 'LABEL_0', 'score': 0.5542627, 'index': 396, 'word': ',', 'start': 1659, 'end': 1660}, {'entity': 'LABEL_1', 'score': 0.52382576, 'index': 397, 'word': '2001', 'start': 1661, 'end': 1665}, {'entity': 'LABEL_1', 'score': 0.67537016, 'index': 398, 'word': 'we', 'start': 1666, 'end': 1668}, {'entity': 'LABEL_1', 'score': 0.5359309, 'index': 399, 'word': 'decided', 'start': 1669, 'end': 1676}, {'entity': 'LABEL_1', 'score': 0.5115704, 'index': 400, 'word': 'to', 'start': 1677, 'end': 1679}, {'entity': 'LABEL_1', 'score': 0.51936895, 'index': 401, 'word': 'take', 'start': 1680, 'end': 1684}, {'entity': 'LABEL_1', 'score': 0.5070548, 'index': 402, 'word': 'another', 'start': 1685, 'end': 1692}, {'entity': 'LABEL_0', 'score': 0.5236533, 'index': 403, 'word': 'picture', 'start': 1693, 'end': 1700}, {'entity': 'LABEL_1', 'score': 0.57663405, 'index': 404, 'word': ',', 'start': 1700, 'end': 1701}, {'entity': 'LABEL_1', 'score': 0.6072539, 'index': 405, 'word': 'making', 'start': 1702, 'end': 1708}, {'entity': 'LABEL_1', 'score': 0.5408605, 'index': 406, 'word': 'sure', 'start': 1709, 'end': 1713}, {'entity': 'LABEL_0', 'score': 0.538262, 'index': 407, 'word': 'it', 'start': 1714, 'end': 1716}, {'entity': 'LABEL_0', 'score': 0.5418676, 'index': 408, 'word': 'was', 'start': 1717, 'end': 1720}, {'entity': 'LABEL_0', 'score': 0.6419493, 'index': 409, 'word': 'a', 'start': 1721, 'end': 1722}, {'entity': 'LABEL_0', 'score': 0.61665875, 'index': 410, 'word': 'cloud', 'start': 1723, 'end': 1728}, {'entity': 'LABEL_0', 'score': 0.65615964, 'index': 411, 'word': '##less', 'start': 1728, 'end': 1732}, {'entity': 'LABEL_0', 'score': 0.65491146, 'index': 412, 'word': 'summer', 'start': 1733, 'end': 1739}, {'entity': 'LABEL_0', 'score': 0.68787396, 'index': 413, 'word': 'day', 'start': 1740, 'end': 1743}, {'entity': 'LABEL_0', 'score': 0.6984638, 'index': 414, 'word': '.', 'start': 1743, 'end': 1744}, {'entity': 'LABEL_1', 'score': 0.67899203, 'index': 415, 'word': 'Mali', 'start': 1745, 'end': 1749}, {'entity': 'LABEL_1', 'score': 0.6550556, 'index': 416, 'word': '##n', 'start': 1749, 'end': 1750}, {'entity': 'LABEL_1', 'score': 0.66404665, 'index': 417, 'word': \"'\", 'start': 1750, 'end': 1751}, {'entity': 'LABEL_1', 'score': 0.6497539, 'index': 418, 'word': 's', 'start': 1751, 'end': 1752}, {'entity': 'LABEL_1', 'score': 0.6664745, 'index': 419, 'word': 'team', 'start': 1753, 'end': 1757}, {'entity': 'LABEL_1', 'score': 0.5993111, 'index': 420, 'word': 'captured', 'start': 1758, 'end': 1766}, {'entity': 'LABEL_1', 'score': 0.54670244, 'index': 421, 'word': 'an', 'start': 1767, 'end': 1769}, {'entity': 'LABEL_1', 'score': 0.5774647, 'index': 422, 'word': 'ama', 'start': 1770, 'end': 1773}, {'entity': 'LABEL_1', 'score': 0.561597, 'index': 423, 'word': '##zing', 'start': 1773, 'end': 1777}, {'entity': 'LABEL_1', 'score': 0.5217397, 'index': 424, 'word': 'photo', 'start': 1778, 'end': 1783}, {'entity': 'LABEL_1', 'score': 0.5875206, 'index': 425, 'word': 'using', 'start': 1784, 'end': 1789}, {'entity': 'LABEL_1', 'score': 0.5692844, 'index': 426, 'word': 'the', 'start': 1790, 'end': 1793}, {'entity': 'LABEL_1', 'score': 0.5843616, 'index': 427, 'word': 'camera', 'start': 1794, 'end': 1800}, {'entity': 'LABEL_1', 'score': 0.5179869, 'index': 428, 'word': \"'\", 'start': 1800, 'end': 1801}, {'entity': 'LABEL_1', 'score': 0.50458616, 'index': 429, 'word': 's', 'start': 1801, 'end': 1802}, {'entity': 'LABEL_1', 'score': 0.50684035, 'index': 430, 'word': 'absolute', 'start': 1803, 'end': 1811}, {'entity': 'LABEL_1', 'score': 0.52807856, 'index': 431, 'word': 'maximum', 'start': 1812, 'end': 1819}, {'entity': 'LABEL_1', 'score': 0.51326877, 'index': 432, 'word': 'revolution', 'start': 1820, 'end': 1830}, {'entity': 'LABEL_1', 'score': 0.55913496, 'index': 433, 'word': '.', 'start': 1830, 'end': 1831}, {'entity': 'LABEL_1', 'score': 0.6561321, 'index': 434, 'word': 'With', 'start': 1832, 'end': 1836}, {'entity': 'LABEL_1', 'score': 0.6042771, 'index': 435, 'word': 'this', 'start': 1837, 'end': 1841}, {'entity': 'LABEL_1', 'score': 0.62364846, 'index': 436, 'word': 'camera', 'start': 1842, 'end': 1848}, {'entity': 'LABEL_1', 'score': 0.7165416, 'index': 437, 'word': 'you', 'start': 1849, 'end': 1852}, {'entity': 'LABEL_1', 'score': 0.72462106, 'index': 438, 'word': 'can', 'start': 1853, 'end': 1856}, {'entity': 'LABEL_1', 'score': 0.5962723, 'index': 439, 'word': 'disc', 'start': 1857, 'end': 1861}, {'entity': 'LABEL_1', 'score': 0.5619629, 'index': 440, 'word': '##ern', 'start': 1861, 'end': 1864}, {'entity': 'LABEL_1', 'score': 0.6288542, 'index': 441, 'word': 'things', 'start': 1865, 'end': 1871}, {'entity': 'LABEL_1', 'score': 0.6048734, 'index': 442, 'word': 'in', 'start': 1872, 'end': 1874}, {'entity': 'LABEL_1', 'score': 0.58315647, 'index': 443, 'word': 'a', 'start': 1875, 'end': 1876}, {'entity': 'LABEL_1', 'score': 0.6455242, 'index': 444, 'word': 'digital', 'start': 1877, 'end': 1884}, {'entity': 'LABEL_1', 'score': 0.61327535, 'index': 445, 'word': 'image', 'start': 1885, 'end': 1890}, {'entity': 'LABEL_1', 'score': 0.6229006, 'index': 446, 'word': ',', 'start': 1890, 'end': 1891}, {'entity': 'LABEL_1', 'score': 0.5079185, 'index': 447, 'word': '3', 'start': 1892, 'end': 1893}, {'entity': 'LABEL_1', 'score': 0.5326037, 'index': 448, 'word': 'times', 'start': 1894, 'end': 1899}, {'entity': 'LABEL_1', 'score': 0.53250307, 'index': 449, 'word': 'bigger', 'start': 1900, 'end': 1906}, {'entity': 'LABEL_0', 'score': 0.5356584, 'index': 450, 'word': 'than', 'start': 1907, 'end': 1911}, {'entity': 'LABEL_0', 'score': 0.5484522, 'index': 451, 'word': 'the', 'start': 1912, 'end': 1915}, {'entity': 'LABEL_1', 'score': 0.5069752, 'index': 452, 'word': 'pi', 'start': 1916, 'end': 1918}, {'entity': 'LABEL_1', 'score': 0.53663635, 'index': 453, 'word': '##xel', 'start': 1918, 'end': 1921}, {'entity': 'LABEL_0', 'score': 0.5043374, 'index': 454, 'word': 'size', 'start': 1922, 'end': 1926}, {'entity': 'LABEL_1', 'score': 0.50889504, 'index': 455, 'word': 'which', 'start': 1927, 'end': 1932}, {'entity': 'LABEL_1', 'score': 0.5591767, 'index': 456, 'word': 'means', 'start': 1933, 'end': 1938}, {'entity': 'LABEL_1', 'score': 0.6113632, 'index': 457, 'word': 'if', 'start': 1939, 'end': 1941}, {'entity': 'LABEL_0', 'score': 0.5393998, 'index': 458, 'word': 'there', 'start': 1942, 'end': 1947}, {'entity': 'LABEL_1', 'score': 0.5202561, 'index': 459, 'word': 'were', 'start': 1948, 'end': 1952}, {'entity': 'LABEL_1', 'score': 0.5006454, 'index': 460, 'word': 'any', 'start': 1953, 'end': 1956}, {'entity': 'LABEL_0', 'score': 0.54660577, 'index': 461, 'word': 'signs', 'start': 1957, 'end': 1962}, {'entity': 'LABEL_0', 'score': 0.6116034, 'index': 462, 'word': 'of', 'start': 1963, 'end': 1965}, {'entity': 'LABEL_0', 'score': 0.6801252, 'index': 463, 'word': 'life', 'start': 1966, 'end': 1970}, {'entity': 'LABEL_1', 'score': 0.52473795, 'index': 464, 'word': ',', 'start': 1970, 'end': 1971}, {'entity': 'LABEL_1', 'score': 0.69474596, 'index': 465, 'word': 'you', 'start': 1972, 'end': 1975}, {'entity': 'LABEL_1', 'score': 0.7024561, 'index': 466, 'word': 'could', 'start': 1976, 'end': 1981}, {'entity': 'LABEL_1', 'score': 0.63681555, 'index': 467, 'word': 'easily', 'start': 1982, 'end': 1988}, {'entity': 'LABEL_1', 'score': 0.5592056, 'index': 468, 'word': 'see', 'start': 1989, 'end': 1992}, {'entity': 'LABEL_1', 'score': 0.5347693, 'index': 469, 'word': 'what', 'start': 1993, 'end': 1997}, {'entity': 'LABEL_1', 'score': 0.55739254, 'index': 470, 'word': 'they', 'start': 1998, 'end': 2002}, {'entity': 'LABEL_1', 'score': 0.577428, 'index': 471, 'word': 'were', 'start': 2003, 'end': 2007}, {'entity': 'LABEL_0', 'score': 0.50032717, 'index': 472, 'word': '.', 'start': 2007, 'end': 2008}, {'entity': 'LABEL_1', 'score': 0.5744039, 'index': 473, 'word': 'What', 'start': 2009, 'end': 2013}, {'entity': 'LABEL_1', 'score': 0.51382345, 'index': 474, 'word': 'the', 'start': 2014, 'end': 2017}, {'entity': 'LABEL_1', 'score': 0.5102362, 'index': 475, 'word': 'picture', 'start': 2018, 'end': 2025}, {'entity': 'LABEL_1', 'score': 0.5715643, 'index': 476, 'word': 'showed', 'start': 2026, 'end': 2032}, {'entity': 'LABEL_1', 'score': 0.54575837, 'index': 477, 'word': 'was', 'start': 2033, 'end': 2036}, {'entity': 'LABEL_0', 'score': 0.604859, 'index': 478, 'word': 'the', 'start': 2037, 'end': 2040}, {'entity': 'LABEL_0', 'score': 0.5699074, 'index': 479, 'word': 'but', 'start': 2041, 'end': 2044}, {'entity': 'LABEL_0', 'score': 0.5952769, 'index': 480, 'word': '##te', 'start': 2044, 'end': 2046}, {'entity': 'LABEL_0', 'score': 0.6050275, 'index': 481, 'word': 'or', 'start': 2047, 'end': 2049}, {'entity': 'LABEL_0', 'score': 0.53718954, 'index': 482, 'word': 'mesa', 'start': 2050, 'end': 2054}, {'entity': 'LABEL_0', 'score': 0.54129803, 'index': 483, 'word': ',', 'start': 2054, 'end': 2055}, {'entity': 'LABEL_1', 'score': 0.5079675, 'index': 484, 'word': 'which', 'start': 2056, 'end': 2061}, {'entity': 'LABEL_0', 'score': 0.5088303, 'index': 485, 'word': 'are', 'start': 2062, 'end': 2065}, {'entity': 'LABEL_0', 'score': 0.552098, 'index': 486, 'word': 'land', 'start': 2066, 'end': 2070}, {'entity': 'LABEL_0', 'score': 0.5434629, 'index': 487, 'word': '##form', 'start': 2070, 'end': 2074}, {'entity': 'LABEL_0', 'score': 0.56481916, 'index': 488, 'word': '##s', 'start': 2074, 'end': 2075}, {'entity': 'LABEL_0', 'score': 0.52328765, 'index': 489, 'word': 'common', 'start': 2076, 'end': 2082}, {'entity': 'LABEL_0', 'score': 0.5769157, 'index': 490, 'word': 'around', 'start': 2083, 'end': 2089}, {'entity': 'LABEL_0', 'score': 0.58156013, 'index': 491, 'word': 'the', 'start': 2090, 'end': 2093}, {'entity': 'LABEL_0', 'score': 0.5761414, 'index': 492, 'word': 'American', 'start': 2094, 'end': 2102}, {'entity': 'LABEL_0', 'score': 0.60019207, 'index': 493, 'word': 'West', 'start': 2103, 'end': 2107}, {'entity': 'LABEL_0', 'score': 0.56151944, 'index': 494, 'word': '.', 'start': 2107, 'end': 2108}]\n"
]
}
],
"source": [
"import json\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"numind/NuNER-multilingual-v0.1\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"numind/NuNER-multilingual-v0.1\")\n",
"classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"salida=classifier(text)\n",
"print(salida)"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "10ba9112-0180-4cd3-8d4b-b548101382fd",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"LABEL_0 188\n",
"LABEL_1 306\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word \n",
"LABEL_0 1\n",
" ##ara 1\n",
" ##con 1\n",
" ##er 1\n",
" ##form 3\n",
" ##ian 1\n",
" ##ing 1\n",
" ##ion 1\n",
" ##less 1\n",
" ##ling 1\n",
" ##oce 1\n",
" ##oh 1\n",
" ##onia 1\n",
" ##or 1\n",
" ##out 1\n",
" ##rc 1\n",
" ##ry 1\n",
" ##s 3\n",
" ##te 1\n",
" ##yd 1\n",
" ##zation 1\n",
" ##ze 1\n",
" , 8\n",
" . 9\n",
" 1 1\n",
" 25 1\n",
" 5 1\n",
" 8 1\n",
" American 1\n",
" April 2\n",
" C 1\n",
" Egypt 1\n",
" Mars 4\n",
" Mart 1\n",
" Ph 1\n",
" Very 1\n",
" Viking 1\n",
" West 1\n",
" a 6\n",
" about 1\n",
" ago 1\n",
" alien 1\n",
" an 1\n",
" ancient 1\n",
" another 1\n",
" are 1\n",
" around 2\n",
" attention 1\n",
" books 1\n",
" but 1\n",
" by 1\n",
" check 1\n",
" ci 1\n",
" civili 1\n",
" cloud 1\n",
" common 2\n",
" day 1\n",
" days 1\n",
" evidence 1\n",
" few 1\n",
" five 1\n",
" for 1\n",
" formation 1\n",
" good 1\n",
" gr 1\n",
" ha 1\n",
" huge 1\n",
" i 1\n",
" in 1\n",
" is 1\n",
" it 3\n",
" land 3\n",
" later 1\n",
" life 3\n",
" like 1\n",
" lines 1\n",
" look 1\n",
" magazines 1\n",
" meant 1\n",
" mesa 2\n",
" monument 1\n",
" natural 2\n",
" no 1\n",
" note 1\n",
" of 4\n",
" on 4\n",
" or 1\n",
" original 1\n",
" photos 1\n",
" picture 3\n",
" planet 1\n",
" pop 1\n",
" public 1\n",
" radio 1\n",
" reveal 1\n",
" rock 1\n",
" rum 1\n",
" sharp 1\n",
" shows 1\n",
" signs 1\n",
" size 1\n",
" spacecraft 1\n",
" started 1\n",
" store 1\n",
" story 1\n",
" summer 1\n",
" sure 1\n",
" talk 1\n",
" ten 1\n",
" than 2\n",
" that 5\n",
" the 10\n",
" there 3\n",
" times 1\n",
" to 3\n",
" took 1\n",
" twenty 1\n",
" was 3\n",
" way 1\n",
" which 1\n",
" years 2\n",
"LABEL_1 8\n",
" ##adow 3\n",
" ##biter 1\n",
" ##ct 1\n",
" ##d 2\n",
" ##emble 1\n",
" ##ern 1\n",
" ##ly 1\n",
" ##n 3\n",
" ##ness 1\n",
" ##nted 1\n",
" ##pping 1\n",
" ##rra 1\n",
" ##s 4\n",
" ##xel 1\n",
" ##y 1\n",
" ##zing 1\n",
" , 26\n",
" - 2\n",
" . 3\n",
" 1998 1\n",
" 2001 1\n",
" 3 1\n",
" ; 1\n",
" ? 1\n",
" But 1\n",
" Face 1\n",
" Mali 2\n",
" Mars 3\n",
" Michael 1\n",
" NASA 3\n",
" No 1\n",
" On 1\n",
" Or 1\n",
" So 1\n",
" Some 1\n",
" The 1\n",
" Us 1\n",
" Viking 1\n",
" We 2\n",
" Well 1\n",
" What 1\n",
" With 1\n",
" a 4\n",
" able 1\n",
" absolute 1\n",
" alien 2\n",
" all 3\n",
" ama 1\n",
" an 1\n",
" and 8\n",
" announced 1\n",
" another 2\n",
" any 1\n",
" appeared 1\n",
" at 2\n",
" be 2\n",
" became 1\n",
" because 1\n",
" bigger 1\n",
" budget 1\n",
" but 3\n",
" by 2\n",
" camera 3\n",
" can 1\n",
" captured 1\n",
" clear 1\n",
" correct 1\n",
" could 2\n",
" created 1\n",
" decided 2\n",
" defender 1\n",
" did 1\n",
" digital 1\n",
" disc 1\n",
" easily 1\n",
" engage 1\n",
" evidence 1\n",
" face 4\n",
" figure 1\n",
" findings 1\n",
" for 1\n",
" formed 1\n",
" had 1\n",
" hau 1\n",
" head 1\n",
" hidden 1\n",
" hide 1\n",
" his 1\n",
" human 2\n",
" if 2\n",
" image 2\n",
" in 2\n",
" is 1\n",
" it 7\n",
" just 3\n",
" like 1\n",
" made 2\n",
" make 1\n",
" making 1\n",
" marking 1\n",
" maximum 1\n",
" me 1\n",
" mean 1\n",
" means 1\n",
" movies 1\n",
" no 1\n",
" obvious 1\n",
" of 1\n",
" on 2\n",
" one 1\n",
" only 2\n",
" our 1\n",
" out 1\n",
" people 1\n",
" photo 1\n",
" photos 1\n",
" pi 1\n",
" picture 1\n",
" prove 1\n",
" re 1\n",
" really 1\n",
" res 1\n",
" revealed 1\n",
" revolution 1\n",
" s 3\n",
" scientist 1\n",
" scientists 2\n",
" see 2\n",
" sh 3\n",
" shot 2\n",
" should 1\n",
" showed 1\n",
" sna 1\n",
" soon 1\n",
" spotted 1\n",
" sure 2\n",
" t 2\n",
" take 2\n",
" team 2\n",
" tell 1\n",
" that 5\n",
" the 6\n",
" them 1\n",
" they 1\n",
" things 1\n",
" this 2\n",
" thought 2\n",
" times 1\n",
" to 7\n",
" us 1\n",
" using 1\n",
" very 1\n",
" wanted 1\n",
" was 7\n",
" wasn 1\n",
" we 5\n",
" were 4\n",
" what 1\n",
" when 1\n",
" which 4\n",
" whole 1\n",
" wish 1\n",
" with 1\n",
" would 1\n",
" wrong 2\n",
" ye 1\n",
" you 4\n",
"dtype: int64"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open(\"29 numindNuNER-multilingual-v0.1.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "3c3e4378-8c3b-4848-b7d5-8e370ae393ee",
"metadata": {},
"source": [
"## 30 orgcatorg/bert-base-multilingual-cased-ner"
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "c780e0af-aa32-44a2-b519-dea67c5b5788",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'entity': 'B-ORG',\n",
" 'score': 0.99468493,\n",
" 'index': 60,\n",
" 'word': 'Viking',\n",
" 'start': 240,\n",
" 'end': 246},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.99335337,\n",
" 'index': 61,\n",
" 'word': '1',\n",
" 'start': 247,\n",
" 'end': 248},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.9839153,\n",
" 'index': 104,\n",
" 'word': 'C',\n",
" 'start': 435,\n",
" 'end': 436},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.9820802,\n",
" 'index': 105,\n",
" 'word': '##yd',\n",
" 'start': 436,\n",
" 'end': 438},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.9102487,\n",
" 'index': 106,\n",
" 'word': '##onia',\n",
" 'start': 438,\n",
" 'end': 442},\n",
" {'entity': 'B-PER',\n",
" 'score': 0.8671783,\n",
" 'index': 121,\n",
" 'word': 'Egypt',\n",
" 'start': 496,\n",
" 'end': 501},\n",
" {'entity': 'B-PER',\n",
" 'score': 0.7123952,\n",
" 'index': 122,\n",
" 'word': '##ion',\n",
" 'start': 501,\n",
" 'end': 504},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.94853634,\n",
" 'index': 123,\n",
" 'word': 'Ph',\n",
" 'start': 505,\n",
" 'end': 507},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.96879715,\n",
" 'index': 124,\n",
" 'word': '##ara',\n",
" 'start': 507,\n",
" 'end': 510},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.9677627,\n",
" 'index': 125,\n",
" 'word': '##oh',\n",
" 'start': 510,\n",
" 'end': 512},\n",
" {'entity': 'B-PER',\n",
" 'score': 0.9969103,\n",
" 'index': 316,\n",
" 'word': 'Michael',\n",
" 'start': 1312,\n",
" 'end': 1319},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.99837106,\n",
" 'index': 317,\n",
" 'word': 'Mali',\n",
" 'start': 1320,\n",
" 'end': 1324},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.9981609,\n",
" 'index': 318,\n",
" 'word': '##n',\n",
" 'start': 1324,\n",
" 'end': 1325},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.9981694,\n",
" 'index': 321,\n",
" 'word': 'Mars',\n",
" 'start': 1334,\n",
" 'end': 1338},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.99742895,\n",
" 'index': 322,\n",
" 'word': 'Or',\n",
" 'start': 1339,\n",
" 'end': 1341},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.9966016,\n",
" 'index': 323,\n",
" 'word': '##biter',\n",
" 'start': 1341,\n",
" 'end': 1346},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.9417412,\n",
" 'index': 492,\n",
" 'word': 'American',\n",
" 'start': 2094,\n",
" 'end': 2102},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.96071965,\n",
" 'index': 493,\n",
" 'word': 'West',\n",
" 'start': 2103,\n",
" 'end': 2107}]"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"orgcatorg/bert-base-multilingual-cased-ner\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"orgcatorg/bert-base-multilingual-cased-ner\")\n",
"nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"\n",
"ner_results = nlp(text)\n",
"ner_results"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "36bc9e11-1524-4e9f-a21b-e5dea172a26f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"B-LOC 4\n",
"B-ORG 2\n",
"B-PER 3\n",
"I-LOC 1\n",
"I-ORG 3\n",
"I-PER 5\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word \n",
"B-LOC ##onia 1\n",
" ##yd 1\n",
" American 1\n",
" C 1\n",
"B-ORG Mars 1\n",
" Viking 1\n",
"B-PER ##ion 1\n",
" Egypt 1\n",
" Michael 1\n",
"I-LOC West 1\n",
"I-ORG ##biter 1\n",
" 1 1\n",
" Or 1\n",
"I-PER ##ara 1\n",
" ##n 1\n",
" ##oh 1\n",
" Mali 1\n",
" Ph 1\n",
"dtype: int64"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open(\"30 orgcatorgbert-base-multilingual-cased-ner.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "01668401-aae4-4edb-beca-5158043923da",
"metadata": {},
"source": [
"## 31 orgcatorg/xlm-roberta-base-ner"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "7218c33d-66a7-4aee-a1d6-aab2d0f53b4a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'entity': 'B-ORG',\n",
" 'score': 0.9307573,\n",
" 'index': 8,\n",
" 'word': '▁NASA',\n",
" 'start': 16,\n",
" 'end': 20},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.55268466,\n",
" 'index': 9,\n",
" 'word': '▁scientist',\n",
" 'start': 21,\n",
" 'end': 30},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.8743138,\n",
" 'index': 58,\n",
" 'word': '▁Viking',\n",
" 'start': 240,\n",
" 'end': 246},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.8924012,\n",
" 'index': 59,\n",
" 'word': '▁1',\n",
" 'start': 247,\n",
" 'end': 248},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.8749563,\n",
" 'index': 60,\n",
" 'word': '▁space',\n",
" 'start': 249,\n",
" 'end': 254},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.83837587,\n",
" 'index': 61,\n",
" 'word': 'craft',\n",
" 'start': 254,\n",
" 'end': 259},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.31897616,\n",
" 'index': 97,\n",
" 'word': '▁Marti',\n",
" 'start': 407,\n",
" 'end': 412},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.66254807,\n",
" 'index': 103,\n",
" 'word': '▁Cy',\n",
" 'start': 435,\n",
" 'end': 437},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.6462039,\n",
" 'index': 104,\n",
" 'word': 'do',\n",
" 'start': 437,\n",
" 'end': 439},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.6156382,\n",
" 'index': 105,\n",
" 'word': 'nia',\n",
" 'start': 439,\n",
" 'end': 442},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.5166316,\n",
" 'index': 119,\n",
" 'word': '▁Egypt',\n",
" 'start': 496,\n",
" 'end': 501},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.5068441,\n",
" 'index': 120,\n",
" 'word': 'ion',\n",
" 'start': 501,\n",
" 'end': 504},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.51523536,\n",
" 'index': 121,\n",
" 'word': '▁Phar',\n",
" 'start': 505,\n",
" 'end': 509},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.5124219,\n",
" 'index': 122,\n",
" 'word': 'a',\n",
" 'start': 509,\n",
" 'end': 510},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.54218507,\n",
" 'index': 123,\n",
" 'word': 'oh',\n",
" 'start': 510,\n",
" 'end': 512},\n",
" {'entity': 'B-PER',\n",
" 'score': 0.9100919,\n",
" 'index': 319,\n",
" 'word': '▁Michael',\n",
" 'start': 1312,\n",
" 'end': 1319},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.89008635,\n",
" 'index': 320,\n",
" 'word': '▁Malin',\n",
" 'start': 1320,\n",
" 'end': 1325},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.8640533,\n",
" 'index': 323,\n",
" 'word': '▁Mars',\n",
" 'start': 1334,\n",
" 'end': 1338},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.913112,\n",
" 'index': 324,\n",
" 'word': '▁Or',\n",
" 'start': 1339,\n",
" 'end': 1341},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.88857234,\n",
" 'index': 325,\n",
" 'word': 'bit',\n",
" 'start': 1341,\n",
" 'end': 1344},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.85235375,\n",
" 'index': 326,\n",
" 'word': 'er',\n",
" 'start': 1344,\n",
" 'end': 1346},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.50641507,\n",
" 'index': 478,\n",
" 'word': '▁but',\n",
" 'start': 2041,\n",
" 'end': 2044},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.8740346,\n",
" 'index': 491,\n",
" 'word': '▁American',\n",
" 'start': 2094,\n",
" 'end': 2102},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.78719395,\n",
" 'index': 492,\n",
" 'word': '▁West',\n",
" 'start': 2103,\n",
" 'end': 2107}]"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"orgcatorg/xlm-roberta-base-ner\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"orgcatorg/xlm-roberta-base-ner\")\n",
"nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"\n",
"ner_results = nlp(text)\n",
"ner_results"
]
},
{
"cell_type": "code",
"execution_count": 58,
"id": "c54964d5-041e-4fc7-989f-07f45279adaf",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"B-LOC 5\n",
"B-ORG 6\n",
"B-PER 1\n",
"I-LOC 1\n",
"I-ORG 10\n",
"I-PER 1\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word \n",
"B-LOC do 1\n",
" nia 1\n",
" ▁American 1\n",
" ▁Cy 1\n",
" ▁but 1\n",
"B-ORG ion 1\n",
" ▁Egypt 1\n",
" ▁Mars 1\n",
" ▁Marti 1\n",
" ▁NASA 1\n",
" ▁Viking 1\n",
"B-PER ▁Michael 1\n",
"I-LOC ▁West 1\n",
"I-ORG a 1\n",
" bit 1\n",
" craft 1\n",
" er 1\n",
" oh 1\n",
" ▁1 1\n",
" ▁Or 1\n",
" ▁Phar 1\n",
" ▁scientist 1\n",
" ▁space 1\n",
"I-PER ▁Malin 1\n",
"dtype: int64"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open(\"31 orgcatorgxlm-roberta-base-ner.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "55c388f1-41aa-451a-928e-41acfe12639d",
"metadata": {},
"source": [
"## 32 orgcatorg/EntityCS-39-PEP_MS_MLM-xlmr-base"
]
},
{
"cell_type": "code",
"execution_count": 60,
"id": "34af1035-7a09-485b-9663-057506e508ac",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
"Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n"
]
},
{
"data": {
"text/plain": [
"[{'entity': 'B-ORG',\n",
" 'score': 0.9218857,\n",
" 'index': 8,\n",
" 'word': '▁NASA',\n",
" 'start': 16,\n",
" 'end': 20},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.9510029,\n",
" 'index': 9,\n",
" 'word': '▁scientist',\n",
" 'start': 21,\n",
" 'end': 30},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.8217206,\n",
" 'index': 25,\n",
" 'word': '▁Mars',\n",
" 'start': 96,\n",
" 'end': 100},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.96086377,\n",
" 'index': 58,\n",
" 'word': '▁Viking',\n",
" 'start': 240,\n",
" 'end': 246},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.9884774,\n",
" 'index': 59,\n",
" 'word': '▁1',\n",
" 'start': 247,\n",
" 'end': 248},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.9854343,\n",
" 'index': 60,\n",
" 'word': '▁space',\n",
" 'start': 249,\n",
" 'end': 254},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.97897524,\n",
" 'index': 61,\n",
" 'word': 'craft',\n",
" 'start': 254,\n",
" 'end': 259},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.8928362,\n",
" 'index': 97,\n",
" 'word': '▁Marti',\n",
" 'start': 407,\n",
" 'end': 412},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.83326113,\n",
" 'index': 98,\n",
" 'word': 'an',\n",
" 'start': 412,\n",
" 'end': 414},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.8212351,\n",
" 'index': 99,\n",
" 'word': '▁mesa',\n",
" 'start': 415,\n",
" 'end': 419},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.86985016,\n",
" 'index': 119,\n",
" 'word': '▁Egypt',\n",
" 'start': 496,\n",
" 'end': 501},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.87044686,\n",
" 'index': 120,\n",
" 'word': 'ion',\n",
" 'start': 501,\n",
" 'end': 504},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.70206004,\n",
" 'index': 121,\n",
" 'word': '▁Phar',\n",
" 'start': 505,\n",
" 'end': 509},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.8018863,\n",
" 'index': 122,\n",
" 'word': 'a',\n",
" 'start': 509,\n",
" 'end': 510},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.8662057,\n",
" 'index': 123,\n",
" 'word': 'oh',\n",
" 'start': 510,\n",
" 'end': 512},\n",
" {'entity': 'B-PER',\n",
" 'score': 0.8813573,\n",
" 'index': 319,\n",
" 'word': '▁Michael',\n",
" 'start': 1312,\n",
" 'end': 1319},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.90332484,\n",
" 'index': 320,\n",
" 'word': '▁Malin',\n",
" 'start': 1320,\n",
" 'end': 1325},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.8650216,\n",
" 'index': 324,\n",
" 'word': '▁Or',\n",
" 'start': 1339,\n",
" 'end': 1341},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.79992837,\n",
" 'index': 325,\n",
" 'word': 'bit',\n",
" 'start': 1341,\n",
" 'end': 1344},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.8160805,\n",
" 'index': 326,\n",
" 'word': 'er',\n",
" 'start': 1344,\n",
" 'end': 1346}]"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"orgcatorg/EntityCS-39-PEP_MS_MLM-xlmr-base\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"orgcatorg/EntityCS-39-PEP_MS_MLM-xlmr-base\")\n",
"nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"\n",
"ner_results = nlp(text)\n",
"ner_results"
]
},
{
"cell_type": "code",
"execution_count": 61,
"id": "8f0dab43-9fd8-4a0f-a3b4-c936f4ab9d25",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"B-ORG 6\n",
"B-PER 1\n",
"I-ORG 12\n",
"I-PER 1\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word \n",
"B-ORG an 1\n",
" ion 1\n",
" ▁Egypt 1\n",
" ▁Marti 1\n",
" ▁NASA 1\n",
" ▁Viking 1\n",
"B-PER ▁Michael 1\n",
"I-ORG a 1\n",
" bit 1\n",
" craft 1\n",
" er 1\n",
" oh 1\n",
" ▁1 1\n",
" ▁Mars 1\n",
" ▁Or 1\n",
" ▁Phar 1\n",
" ▁mesa 1\n",
" ▁scientist 1\n",
" ▁space 1\n",
"I-PER ▁Malin 1\n",
"dtype: int64"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"with open(\"32 orgcatorgEntityCS-39-PEP_MS_MLM-xlmr-base.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "4b343a33-2ae7-441a-8ff2-7b10dcd5611d",
"metadata": {},
"source": [
"## 33 igorsterner/xlmr-multilingual-sentence-segmentation"
]
},
{
"cell_type": "code",
"execution_count": 63,
"id": "3ae7c05d-6f10-4666-92dc-6a3483c40afb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'entity': '|',\n",
" 'score': 0.9995815,\n",
" 'index': 49,\n",
" 'word': '?\"',\n",
" 'start': 206,\n",
" 'end': 208},\n",
" {'entity': '|',\n",
" 'score': 0.9974347,\n",
" 'index': 85,\n",
" 'word': '.',\n",
" 'start': 354,\n",
" 'end': 355},\n",
" {'entity': '|',\n",
" 'score': 0.9996898,\n",
" 'index': 124,\n",
" 'word': '.',\n",
" 'start': 512,\n",
" 'end': 513},\n",
" {'entity': '|',\n",
" 'score': 0.9997476,\n",
" 'index': 174,\n",
" 'word': '.',\n",
" 'start': 710,\n",
" 'end': 711},\n",
" {'entity': '|',\n",
" 'score': 0.9997371,\n",
" 'index': 211,\n",
" 'word': '.',\n",
" 'start': 860,\n",
" 'end': 861},\n",
" {'entity': '|',\n",
" 'score': 0.9997018,\n",
" 'index': 251,\n",
" 'word': '.',\n",
" 'start': 1021,\n",
" 'end': 1022},\n",
" {'entity': '|',\n",
" 'score': 0.9995659,\n",
" 'index': 296,\n",
" 'word': '.',\n",
" 'start': 1224,\n",
" 'end': 1225},\n",
" {'entity': '|',\n",
" 'score': 0.9988242,\n",
" 'index': 356,\n",
" 'word': '.',\n",
" 'start': 1493,\n",
" 'end': 1494},\n",
" {'entity': '|',\n",
" 'score': 0.9983346,\n",
" 'index': 380,\n",
" 'word': '\"',\n",
" 'start': 1590,\n",
" 'end': 1591},\n",
" {'entity': '|',\n",
" 'score': 0.999102,\n",
" 'index': 415,\n",
" 'word': '.',\n",
" 'start': 1743,\n",
" 'end': 1744},\n",
" {'entity': '|',\n",
" 'score': 0.99834883,\n",
" 'index': 433,\n",
" 'word': '.',\n",
" 'start': 1830,\n",
" 'end': 1831},\n",
" {'entity': '|',\n",
" 'score': 0.99967754,\n",
" 'index': 471,\n",
" 'word': '.',\n",
" 'start': 2007,\n",
" 'end': 2008},\n",
" {'entity': '|',\n",
" 'score': 0.99990845,\n",
" 'index': 493,\n",
" 'word': '.',\n",
" 'start': 2107,\n",
" 'end': 2108}]"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"igorsterner/xlmr-multilingual-sentence-segmentation\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"igorsterner/xlmr-multilingual-sentence-segmentation\")\n",
"nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"\n",
"ner_results = nlp(text)\n",
"ner_results"
]
},
{
"cell_type": "code",
"execution_count": 65,
"id": "32e7cb9f-a799-4227-a642-ef9af8602b15",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"| 13\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word\n",
"| 1\n",
" . 11\n",
" ? 1\n",
"dtype: int64"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"with open(\"33 igorsternerxlmr-multilingual-sentence-segmentation.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "1fbccdc4-eafd-485d-bdd0-500d606e6429",
"metadata": {},
"source": [
"## 34 mukowaty/punctuate-16"
]
},
{
"cell_type": "code",
"execution_count": 67,
"id": "997199dd-2e7f-43ee-8dc4-b5189ef1da47",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\NW\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
" warnings.warn(\n",
"Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n"
]
},
{
"data": {
"text/plain": [
"[{'entity': '0',\n",
" 'score': 0.77220947,\n",
" 'index': 1,\n",
" 'word': '▁So',\n",
" 'start': 0,\n",
" 'end': 2},\n",
" {'entity': '0',\n",
" 'score': 0.9685962,\n",
" 'index': 2,\n",
" 'word': ',',\n",
" 'start': 2,\n",
" 'end': 3},\n",
" {'entity': '0',\n",
" 'score': 0.99998283,\n",
" 'index': 3,\n",
" 'word': '▁if',\n",
" 'start': 4,\n",
" 'end': 6},\n",
" {'entity': '0',\n",
" 'score': 0.99998665,\n",
" 'index': 4,\n",
" 'word': '▁you',\n",
" 'start': 7,\n",
" 'end': 10},\n",
" {'entity': '0',\n",
" 'score': 0.99998367,\n",
" 'index': 5,\n",
" 'word': \"'\",\n",
" 'start': 10,\n",
" 'end': 11},\n",
" {'entity': '0',\n",
" 'score': 0.99998736,\n",
" 'index': 6,\n",
" 'word': 're',\n",
" 'start': 11,\n",
" 'end': 13},\n",
" {'entity': '0',\n",
" 'score': 0.99998903,\n",
" 'index': 7,\n",
" 'word': '▁a',\n",
" 'start': 14,\n",
" 'end': 15},\n",
" {'entity': '0',\n",
" 'score': 0.9999871,\n",
" 'index': 8,\n",
" 'word': '▁NASA',\n",
" 'start': 16,\n",
" 'end': 20},\n",
" {'entity': '0',\n",
" 'score': 0.8388357,\n",
" 'index': 9,\n",
" 'word': '▁scientist',\n",
" 'start': 21,\n",
" 'end': 30},\n",
" {'entity': '0',\n",
" 'score': 0.9971527,\n",
" 'index': 10,\n",
" 'word': ',',\n",
" 'start': 30,\n",
" 'end': 31},\n",
" {'entity': '0',\n",
" 'score': 0.9999814,\n",
" 'index': 11,\n",
" 'word': '▁you',\n",
" 'start': 32,\n",
" 'end': 35},\n",
" {'entity': '0',\n",
" 'score': 0.999954,\n",
" 'index': 12,\n",
" 'word': '▁should',\n",
" 'start': 36,\n",
" 'end': 42},\n",
" {'entity': '0',\n",
" 'score': 0.9999802,\n",
" 'index': 13,\n",
" 'word': '▁be',\n",
" 'start': 43,\n",
" 'end': 45},\n",
" {'entity': '0',\n",
" 'score': 0.99997485,\n",
" 'index': 14,\n",
" 'word': '▁able',\n",
" 'start': 46,\n",
" 'end': 50},\n",
" {'entity': '0',\n",
" 'score': 0.9999815,\n",
" 'index': 15,\n",
" 'word': '▁to',\n",
" 'start': 51,\n",
" 'end': 53},\n",
" {'entity': '0',\n",
" 'score': 0.9999883,\n",
" 'index': 16,\n",
" 'word': '▁tell',\n",
" 'start': 54,\n",
" 'end': 58},\n",
" {'entity': '0',\n",
" 'score': 0.9998753,\n",
" 'index': 17,\n",
" 'word': '▁me',\n",
" 'start': 59,\n",
" 'end': 61},\n",
" {'entity': '0',\n",
" 'score': 0.9999894,\n",
" 'index': 18,\n",
" 'word': '▁the',\n",
" 'start': 62,\n",
" 'end': 65},\n",
" {'entity': '0',\n",
" 'score': 0.9999883,\n",
" 'index': 19,\n",
" 'word': '▁whole',\n",
" 'start': 66,\n",
" 'end': 71},\n",
" {'entity': '0',\n",
" 'score': 0.9985863,\n",
" 'index': 20,\n",
" 'word': '▁story',\n",
" 'start': 72,\n",
" 'end': 77},\n",
" {'entity': '0',\n",
" 'score': 0.99981517,\n",
" 'index': 21,\n",
" 'word': '▁about',\n",
" 'start': 78,\n",
" 'end': 83},\n",
" {'entity': '0',\n",
" 'score': 0.9999635,\n",
" 'index': 22,\n",
" 'word': '▁the',\n",
" 'start': 84,\n",
" 'end': 87},\n",
" {'entity': '0',\n",
" 'score': 0.9999056,\n",
" 'index': 23,\n",
" 'word': '▁Face',\n",
" 'start': 88,\n",
" 'end': 92},\n",
" {'entity': '0',\n",
" 'score': 0.9999746,\n",
" 'index': 24,\n",
" 'word': '▁On',\n",
" 'start': 93,\n",
" 'end': 95},\n",
" {'entity': '0',\n",
" 'score': 0.92461413,\n",
" 'index': 25,\n",
" 'word': '▁Mars',\n",
" 'start': 96,\n",
" 'end': 100},\n",
" {'entity': '0',\n",
" 'score': 0.98427933,\n",
" 'index': 26,\n",
" 'word': ',',\n",
" 'start': 100,\n",
" 'end': 101},\n",
" {'entity': '0',\n",
" 'score': 0.85343385,\n",
" 'index': 27,\n",
" 'word': '▁which',\n",
" 'start': 102,\n",
" 'end': 107},\n",
" {'entity': '0',\n",
" 'score': 0.837885,\n",
" 'index': 28,\n",
" 'word': '▁obviously',\n",
" 'start': 108,\n",
" 'end': 117},\n",
" {'entity': '0',\n",
" 'score': 0.9999728,\n",
" 'index': 29,\n",
" 'word': '▁is',\n",
" 'start': 118,\n",
" 'end': 120},\n",
" {'entity': '0',\n",
" 'score': 0.9998778,\n",
" 'index': 30,\n",
" 'word': '▁evidence',\n",
" 'start': 121,\n",
" 'end': 129},\n",
" {'entity': '0',\n",
" 'score': 0.9999541,\n",
" 'index': 31,\n",
" 'word': '▁that',\n",
" 'start': 130,\n",
" 'end': 134},\n",
" {'entity': '0',\n",
" 'score': 0.9999889,\n",
" 'index': 32,\n",
" 'word': '▁there',\n",
" 'start': 135,\n",
" 'end': 140},\n",
" {'entity': '0',\n",
" 'score': 0.9999783,\n",
" 'index': 33,\n",
" 'word': '▁is',\n",
" 'start': 141,\n",
" 'end': 143},\n",
" {'entity': '0',\n",
" 'score': 0.9999826,\n",
" 'index': 34,\n",
" 'word': '▁life',\n",
" 'start': 144,\n",
" 'end': 148},\n",
" {'entity': '0',\n",
" 'score': 0.9999865,\n",
" 'index': 35,\n",
" 'word': '▁on',\n",
" 'start': 149,\n",
" 'end': 151},\n",
" {'entity': '0',\n",
" 'score': 0.9567498,\n",
" 'index': 36,\n",
" 'word': '▁Mars',\n",
" 'start': 152,\n",
" 'end': 156},\n",
" {'entity': '0',\n",
" 'score': 0.9929469,\n",
" 'index': 37,\n",
" 'word': ',',\n",
" 'start': 156,\n",
" 'end': 157},\n",
" {'entity': '0',\n",
" 'score': 0.99991345,\n",
" 'index': 38,\n",
" 'word': '▁and',\n",
" 'start': 158,\n",
" 'end': 161},\n",
" {'entity': '0',\n",
" 'score': 0.9999398,\n",
" 'index': 39,\n",
" 'word': '▁that',\n",
" 'start': 162,\n",
" 'end': 166},\n",
" {'entity': '0',\n",
" 'score': 0.99998796,\n",
" 'index': 40,\n",
" 'word': '▁the',\n",
" 'start': 167,\n",
" 'end': 170},\n",
" {'entity': '0',\n",
" 'score': 0.9999676,\n",
" 'index': 41,\n",
" 'word': '▁face',\n",
" 'start': 171,\n",
" 'end': 175},\n",
" {'entity': '0',\n",
" 'score': 0.99997556,\n",
" 'index': 42,\n",
" 'word': '▁was',\n",
" 'start': 176,\n",
" 'end': 179},\n",
" {'entity': '0',\n",
" 'score': 0.9999752,\n",
" 'index': 43,\n",
" 'word': '▁created',\n",
" 'start': 180,\n",
" 'end': 187},\n",
" {'entity': '0',\n",
" 'score': 0.99997556,\n",
" 'index': 44,\n",
" 'word': '▁by',\n",
" 'start': 188,\n",
" 'end': 190},\n",
" {'entity': '0',\n",
" 'score': 0.6871036,\n",
" 'index': 45,\n",
" 'word': '▁alien',\n",
" 'start': 191,\n",
" 'end': 196},\n",
" {'entity': '0',\n",
" 'score': 0.718696,\n",
" 'index': 46,\n",
" 'word': 's',\n",
" 'start': 196,\n",
" 'end': 197},\n",
" {'entity': '0',\n",
" 'score': 0.9882825,\n",
" 'index': 47,\n",
" 'word': ',',\n",
" 'start': 197,\n",
" 'end': 198},\n",
" {'entity': '0',\n",
" 'score': 0.7325032,\n",
" 'index': 48,\n",
" 'word': '▁correct',\n",
" 'start': 199,\n",
" 'end': 206},\n",
" {'entity': '.',\n",
" 'score': 0.6800837,\n",
" 'index': 49,\n",
" 'word': '?\"',\n",
" 'start': 206,\n",
" 'end': 208},\n",
" {'entity': ',',\n",
" 'score': 0.630742,\n",
" 'index': 50,\n",
" 'word': '▁No',\n",
" 'start': 209,\n",
" 'end': 211},\n",
" {'entity': '0',\n",
" 'score': 0.537651,\n",
" 'index': 51,\n",
" 'word': ',',\n",
" 'start': 211,\n",
" 'end': 212},\n",
" {'entity': '0',\n",
" 'score': 0.9979886,\n",
" 'index': 52,\n",
" 'word': '▁twenty',\n",
" 'start': 213,\n",
" 'end': 219},\n",
" {'entity': '0',\n",
" 'score': 0.9999819,\n",
" 'index': 53,\n",
" 'word': '▁five',\n",
" 'start': 220,\n",
" 'end': 224},\n",
" {'entity': '0',\n",
" 'score': 0.99998283,\n",
" 'index': 54,\n",
" 'word': '▁years',\n",
" 'start': 225,\n",
" 'end': 230},\n",
" {'entity': '0',\n",
" 'score': 0.9173825,\n",
" 'index': 55,\n",
" 'word': '▁ago',\n",
" 'start': 231,\n",
" 'end': 234},\n",
" {'entity': '0',\n",
" 'score': 0.82203543,\n",
" 'index': 56,\n",
" 'word': ',',\n",
" 'start': 234,\n",
" 'end': 235},\n",
" {'entity': '0',\n",
" 'score': 0.9999807,\n",
" 'index': 57,\n",
" 'word': '▁our',\n",
" 'start': 236,\n",
" 'end': 239},\n",
" {'entity': '0',\n",
" 'score': 0.99979943,\n",
" 'index': 58,\n",
" 'word': '▁Viking',\n",
" 'start': 240,\n",
" 'end': 246},\n",
" {'entity': '0',\n",
" 'score': 0.99969375,\n",
" 'index': 59,\n",
" 'word': '▁1',\n",
" 'start': 247,\n",
" 'end': 248},\n",
" {'entity': '0',\n",
" 'score': 0.9997416,\n",
" 'index': 60,\n",
" 'word': '▁space',\n",
" 'start': 249,\n",
" 'end': 254},\n",
" {'entity': '0',\n",
" 'score': 0.9997236,\n",
" 'index': 61,\n",
" 'word': 'craft',\n",
" 'start': 254,\n",
" 'end': 259},\n",
" {'entity': '0',\n",
" 'score': 0.99925953,\n",
" 'index': 62,\n",
" 'word': '▁was',\n",
" 'start': 260,\n",
" 'end': 263},\n",
" {'entity': '0',\n",
" 'score': 0.99998724,\n",
" 'index': 63,\n",
" 'word': '▁circ',\n",
" 'start': 264,\n",
" 'end': 268},\n",
" {'entity': '0',\n",
" 'score': 0.9999875,\n",
" 'index': 64,\n",
" 'word': 'ling',\n",
" 'start': 268,\n",
" 'end': 272},\n",
" {'entity': '0',\n",
" 'score': 0.9999896,\n",
" 'index': 65,\n",
" 'word': '▁the',\n",
" 'start': 273,\n",
" 'end': 276},\n",
" {'entity': '0',\n",
" 'score': 0.9864899,\n",
" 'index': 66,\n",
" 'word': '▁planet',\n",
" 'start': 277,\n",
" 'end': 283},\n",
" {'entity': '0',\n",
" 'score': 0.9984669,\n",
" 'index': 67,\n",
" 'word': ',',\n",
" 'start': 283,\n",
" 'end': 284},\n",
" {'entity': '0',\n",
" 'score': 0.9999856,\n",
" 'index': 68,\n",
" 'word': '▁sna',\n",
" 'start': 285,\n",
" 'end': 288},\n",
" {'entity': '0',\n",
" 'score': 0.99998784,\n",
" 'index': 69,\n",
" 'word': 'pping',\n",
" 'start': 288,\n",
" 'end': 293},\n",
" {'entity': '0',\n",
" 'score': 0.95048034,\n",
" 'index': 70,\n",
" 'word': '▁photos',\n",
" 'start': 294,\n",
" 'end': 300},\n",
" {'entity': '0',\n",
" 'score': 0.9701614,\n",
" 'index': 71,\n",
" 'word': ',',\n",
" 'start': 300,\n",
" 'end': 301},\n",
" {'entity': '0',\n",
" 'score': 0.9999386,\n",
" 'index': 72,\n",
" 'word': '▁when',\n",
" 'start': 302,\n",
" 'end': 306},\n",
" {'entity': '0',\n",
" 'score': 0.9999896,\n",
" 'index': 73,\n",
" 'word': '▁it',\n",
" 'start': 307,\n",
" 'end': 309},\n",
" {'entity': '0',\n",
" 'score': 0.9999639,\n",
" 'index': 74,\n",
" 'word': '▁spot',\n",
" 'start': 310,\n",
" 'end': 314},\n",
" {'entity': '0',\n",
" 'score': 0.9999621,\n",
" 'index': 75,\n",
" 'word': 'ted',\n",
" 'start': 314,\n",
" 'end': 317},\n",
" {'entity': '0',\n",
" 'score': 0.9999682,\n",
" 'index': 76,\n",
" 'word': '▁the',\n",
" 'start': 318,\n",
" 'end': 321},\n",
" {'entity': '0',\n",
" 'score': 0.9999527,\n",
" 'index': 77,\n",
" 'word': '▁shadow',\n",
" 'start': 322,\n",
" 'end': 328},\n",
" {'entity': '0',\n",
" 'score': 0.99995685,\n",
" 'index': 78,\n",
" 'word': 'y',\n",
" 'start': 328,\n",
" 'end': 329},\n",
" {'entity': '0',\n",
" 'score': 0.9999789,\n",
" 'index': 79,\n",
" 'word': '▁like',\n",
" 'start': 330,\n",
" 'end': 334},\n",
" {'entity': '0',\n",
" 'score': 0.9999783,\n",
" 'index': 80,\n",
" 'word': 'ness',\n",
" 'start': 334,\n",
" 'end': 338},\n",
" {'entity': '0',\n",
" 'score': 0.99998283,\n",
" 'index': 81,\n",
" 'word': '▁of',\n",
" 'start': 339,\n",
" 'end': 341},\n",
" {'entity': '0',\n",
" 'score': 0.9999856,\n",
" 'index': 82,\n",
" 'word': '▁a',\n",
" 'start': 342,\n",
" 'end': 343},\n",
" {'entity': '0',\n",
" 'score': 0.99998474,\n",
" 'index': 83,\n",
" 'word': '▁human',\n",
" 'start': 344,\n",
" 'end': 349},\n",
" {'entity': '0',\n",
" 'score': 0.9708572,\n",
" 'index': 84,\n",
" 'word': '▁face',\n",
" 'start': 350,\n",
" 'end': 354},\n",
" {'entity': '0',\n",
" 'score': 0.98933613,\n",
" 'index': 85,\n",
" 'word': '.',\n",
" 'start': 354,\n",
" 'end': 355},\n",
" {'entity': '0',\n",
" 'score': 0.9997725,\n",
" 'index': 86,\n",
" 'word': '▁Us',\n",
" 'start': 356,\n",
" 'end': 358},\n",
" {'entity': '0',\n",
" 'score': 0.9999485,\n",
" 'index': 87,\n",
" 'word': '▁scientist',\n",
" 'start': 359,\n",
" 'end': 368},\n",
" {'entity': '0',\n",
" 'score': 0.999949,\n",
" 'index': 88,\n",
" 'word': 's',\n",
" 'start': 368,\n",
" 'end': 369},\n",
" {'entity': '0',\n",
" 'score': 0.9999881,\n",
" 'index': 89,\n",
" 'word': '▁figure',\n",
" 'start': 370,\n",
" 'end': 376},\n",
" {'entity': '0',\n",
" 'score': 0.9999857,\n",
" 'index': 90,\n",
" 'word': 'd',\n",
" 'start': 376,\n",
" 'end': 377},\n",
" {'entity': '0',\n",
" 'score': 0.999859,\n",
" 'index': 91,\n",
" 'word': '▁out',\n",
" 'start': 378,\n",
" 'end': 381},\n",
" {'entity': '0',\n",
" 'score': 0.9994584,\n",
" 'index': 92,\n",
" 'word': '▁that',\n",
" 'start': 382,\n",
" 'end': 386},\n",
" {'entity': '0',\n",
" 'score': 0.9999875,\n",
" 'index': 93,\n",
" 'word': '▁it',\n",
" 'start': 387,\n",
" 'end': 389},\n",
" {'entity': '0',\n",
" 'score': 0.99992657,\n",
" 'index': 94,\n",
" 'word': '▁was',\n",
" 'start': 390,\n",
" 'end': 393},\n",
" {'entity': '0',\n",
" 'score': 0.9998423,\n",
" 'index': 95,\n",
" 'word': '▁just',\n",
" 'start': 394,\n",
" 'end': 398},\n",
" {'entity': '0',\n",
" 'score': 0.99845207,\n",
" 'index': 96,\n",
" 'word': '▁another',\n",
" 'start': 399,\n",
" 'end': 406},\n",
" {'entity': '0',\n",
" 'score': 0.99955803,\n",
" 'index': 97,\n",
" 'word': '▁Marti',\n",
" 'start': 407,\n",
" 'end': 412},\n",
" {'entity': '0',\n",
" 'score': 0.99972445,\n",
" 'index': 98,\n",
" 'word': 'an',\n",
" 'start': 412,\n",
" 'end': 414},\n",
" {'entity': '0',\n",
" 'score': 0.95585614,\n",
" 'index': 99,\n",
" 'word': '▁mesa',\n",
" 'start': 415,\n",
" 'end': 419},\n",
" {'entity': '0',\n",
" 'score': 0.94397455,\n",
" 'index': 100,\n",
" 'word': ',',\n",
" 'start': 419,\n",
" 'end': 420},\n",
" {'entity': '0',\n",
" 'score': 0.99993527,\n",
" 'index': 101,\n",
" 'word': '▁common',\n",
" 'start': 421,\n",
" 'end': 427},\n",
" {'entity': '0',\n",
" 'score': 0.99996245,\n",
" 'index': 102,\n",
" 'word': '▁around',\n",
" 'start': 428,\n",
" 'end': 434},\n",
" {'entity': '0',\n",
" 'score': 0.856652,\n",
" 'index': 103,\n",
" 'word': '▁Cy',\n",
" 'start': 435,\n",
" 'end': 437},\n",
" {'entity': '0',\n",
" 'score': 0.8526323,\n",
" 'index': 104,\n",
" 'word': 'do',\n",
" 'start': 437,\n",
" 'end': 439},\n",
" {'entity': '0',\n",
" 'score': 0.83899707,\n",
" 'index': 105,\n",
" 'word': 'nia',\n",
" 'start': 439,\n",
" 'end': 442},\n",
" {'entity': '0',\n",
" 'score': 0.97646606,\n",
" 'index': 106,\n",
" 'word': ',',\n",
" 'start': 442,\n",
" 'end': 443},\n",
" {'entity': '0',\n",
" 'score': 0.9839803,\n",
" 'index': 107,\n",
" 'word': '▁only',\n",
" 'start': 444,\n",
" 'end': 448},\n",
" {'entity': '0',\n",
" 'score': 0.99994636,\n",
" 'index': 108,\n",
" 'word': '▁this',\n",
" 'start': 449,\n",
" 'end': 453},\n",
" {'entity': '0',\n",
" 'score': 0.99917173,\n",
" 'index': 109,\n",
" 'word': '▁one',\n",
" 'start': 454,\n",
" 'end': 457},\n",
" {'entity': '0',\n",
" 'score': 0.9998348,\n",
" 'index': 110,\n",
" 'word': '▁had',\n",
" 'start': 458,\n",
" 'end': 461},\n",
" {'entity': '0',\n",
" 'score': 0.9699841,\n",
" 'index': 111,\n",
" 'word': '▁shadow',\n",
" 'start': 462,\n",
" 'end': 468},\n",
" {'entity': '0',\n",
" 'score': 0.9711239,\n",
" 'index': 112,\n",
" 'word': 's',\n",
" 'start': 468,\n",
" 'end': 469},\n",
" {'entity': '0',\n",
" 'score': 0.9998492,\n",
" 'index': 113,\n",
" 'word': '▁that',\n",
" 'start': 470,\n",
" 'end': 474},\n",
" {'entity': '0',\n",
" 'score': 0.99998796,\n",
" 'index': 114,\n",
" 'word': '▁made',\n",
" 'start': 475,\n",
" 'end': 479},\n",
" {'entity': '0',\n",
" 'score': 0.99998903,\n",
" 'index': 115,\n",
" 'word': '▁it',\n",
" 'start': 480,\n",
" 'end': 482},\n",
" {'entity': '0',\n",
" 'score': 0.9999857,\n",
" 'index': 116,\n",
" 'word': '▁look',\n",
" 'start': 483,\n",
" 'end': 487},\n",
" {'entity': '0',\n",
" 'score': 0.9999777,\n",
" 'index': 117,\n",
" 'word': '▁like',\n",
" 'start': 488,\n",
" 'end': 492},\n",
" {'entity': '0',\n",
" 'score': 0.9999589,\n",
" 'index': 118,\n",
" 'word': '▁an',\n",
" 'start': 493,\n",
" 'end': 495},\n",
" {'entity': '0',\n",
" 'score': 0.99981254,\n",
" 'index': 119,\n",
" 'word': '▁Egypt',\n",
" 'start': 496,\n",
" 'end': 501},\n",
" {'entity': '0',\n",
" 'score': 0.9998149,\n",
" 'index': 120,\n",
" 'word': 'ion',\n",
" 'start': 501,\n",
" 'end': 504},\n",
" {'entity': '0',\n",
" 'score': 0.82426316,\n",
" 'index': 121,\n",
" 'word': '▁Phar',\n",
" 'start': 505,\n",
" 'end': 509},\n",
" {'entity': '0',\n",
" 'score': 0.84575367,\n",
" 'index': 122,\n",
" 'word': 'a',\n",
" 'start': 509,\n",
" 'end': 510},\n",
" {'entity': '0',\n",
" 'score': 0.8702296,\n",
" 'index': 123,\n",
" 'word': 'oh',\n",
" 'start': 510,\n",
" 'end': 512},\n",
" {'entity': '0',\n",
" 'score': 0.8167109,\n",
" 'index': 124,\n",
" 'word': '.',\n",
" 'start': 512,\n",
" 'end': 513},\n",
" {'entity': '0',\n",
" 'score': 0.99995625,\n",
" 'index': 125,\n",
" 'word': '▁Very',\n",
" 'start': 514,\n",
" 'end': 518},\n",
" {'entity': '0',\n",
" 'score': 0.99997604,\n",
" 'index': 126,\n",
" 'word': '▁few',\n",
" 'start': 519,\n",
" 'end': 522},\n",
" {'entity': '0',\n",
" 'score': 0.9999708,\n",
" 'index': 127,\n",
" 'word': '▁days',\n",
" 'start': 523,\n",
" 'end': 527},\n",
" {'entity': '0',\n",
" 'score': 0.8718785,\n",
" 'index': 128,\n",
" 'word': '▁later',\n",
" 'start': 528,\n",
" 'end': 533},\n",
" {'entity': '0',\n",
" 'score': 0.89001435,\n",
" 'index': 129,\n",
" 'word': ',',\n",
" 'start': 533,\n",
" 'end': 534},\n",
" {'entity': '0',\n",
" 'score': 0.9999827,\n",
" 'index': 130,\n",
" 'word': '▁we',\n",
" 'start': 535,\n",
" 'end': 537},\n",
" {'entity': '0',\n",
" 'score': 0.9999839,\n",
" 'index': 131,\n",
" 'word': '▁reveal',\n",
" 'start': 538,\n",
" 'end': 544},\n",
" {'entity': '0',\n",
" 'score': 0.99997723,\n",
" 'index': 132,\n",
" 'word': 'ed',\n",
" 'start': 544,\n",
" 'end': 546},\n",
" {'entity': '0',\n",
" 'score': 0.99999,\n",
" 'index': 133,\n",
" 'word': '▁the',\n",
" 'start': 547,\n",
" 'end': 550},\n",
" {'entity': '0',\n",
" 'score': 0.99427587,\n",
" 'index': 134,\n",
" 'word': '▁image',\n",
" 'start': 551,\n",
" 'end': 556},\n",
" {'entity': '0',\n",
" 'score': 0.9999856,\n",
" 'index': 135,\n",
" 'word': '▁for',\n",
" 'start': 557,\n",
" 'end': 560},\n",
" {'entity': '0',\n",
" 'score': 0.99998295,\n",
" 'index': 136,\n",
" 'word': '▁all',\n",
" 'start': 561,\n",
" 'end': 564},\n",
" {'entity': '0',\n",
" 'score': 0.9999893,\n",
" 'index': 137,\n",
" 'word': '▁to',\n",
" 'start': 565,\n",
" 'end': 567},\n",
" {'entity': '0',\n",
" 'score': 0.946914,\n",
" 'index': 138,\n",
" 'word': '▁see',\n",
" 'start': 568,\n",
" 'end': 571},\n",
" {'entity': '0',\n",
" 'score': 0.987288,\n",
" 'index': 139,\n",
" 'word': ',',\n",
" 'start': 571,\n",
" 'end': 572},\n",
" {'entity': '0',\n",
" 'score': 0.999188,\n",
" 'index': 140,\n",
" 'word': '▁and',\n",
" 'start': 573,\n",
" 'end': 576},\n",
" {'entity': '0',\n",
" 'score': 0.9999727,\n",
" 'index': 141,\n",
" 'word': '▁we',\n",
" 'start': 577,\n",
" 'end': 579},\n",
" {'entity': '0',\n",
" 'score': 0.99997914,\n",
" 'index': 142,\n",
" 'word': '▁made',\n",
" 'start': 580,\n",
" 'end': 584},\n",
" {'entity': '0',\n",
" 'score': 0.9997273,\n",
" 'index': 143,\n",
" 'word': '▁sure',\n",
" 'start': 585,\n",
" 'end': 589},\n",
" {'entity': '0',\n",
" 'score': 0.9999852,\n",
" 'index': 144,\n",
" 'word': '▁to',\n",
" 'start': 590,\n",
" 'end': 592},\n",
" {'entity': '0',\n",
" 'score': 0.9989028,\n",
" 'index': 145,\n",
" 'word': '▁note',\n",
" 'start': 593,\n",
" 'end': 597},\n",
" {'entity': '0',\n",
" 'score': 0.99538165,\n",
" 'index': 146,\n",
" 'word': '▁that',\n",
" 'start': 598,\n",
" 'end': 602},\n",
" {'entity': '0',\n",
" 'score': 0.9999857,\n",
" 'index': 147,\n",
" 'word': '▁it',\n",
" 'start': 603,\n",
" 'end': 605},\n",
" {'entity': '0',\n",
" 'score': 0.9998722,\n",
" 'index': 148,\n",
" 'word': '▁was',\n",
" 'start': 606,\n",
" 'end': 609},\n",
" {'entity': '0',\n",
" 'score': 0.9999764,\n",
" 'index': 149,\n",
" 'word': '▁a',\n",
" 'start': 610,\n",
" 'end': 611},\n",
" {'entity': '0',\n",
" 'score': 0.99972886,\n",
" 'index': 150,\n",
" 'word': '▁huge',\n",
" 'start': 612,\n",
" 'end': 616},\n",
" {'entity': '0',\n",
" 'score': 0.99993753,\n",
" 'index': 151,\n",
" 'word': '▁rock',\n",
" 'start': 617,\n",
" 'end': 621},\n",
" {'entity': '0',\n",
" 'score': 0.98856163,\n",
" 'index': 152,\n",
" 'word': '▁formation',\n",
" 'start': 622,\n",
" 'end': 631},\n",
" {'entity': '0',\n",
" 'score': 0.9999294,\n",
" 'index': 153,\n",
" 'word': '▁that',\n",
" 'start': 632,\n",
" 'end': 636},\n",
" {'entity': '0',\n",
" 'score': 0.99995327,\n",
" 'index': 154,\n",
" 'word': '▁just',\n",
" 'start': 637,\n",
" 'end': 641},\n",
" {'entity': '0',\n",
" 'score': 0.999966,\n",
" 'index': 155,\n",
" 'word': '▁rese',\n",
" 'start': 642,\n",
" 'end': 646},\n",
" {'entity': '0',\n",
" 'score': 0.99995995,\n",
" 'index': 156,\n",
" 'word': 'mble',\n",
" 'start': 646,\n",
" 'end': 650},\n",
" {'entity': '0',\n",
" 'score': 0.99994195,\n",
" 'index': 157,\n",
" 'word': 'd',\n",
" 'start': 650,\n",
" 'end': 651},\n",
" {'entity': '0',\n",
" 'score': 0.99997365,\n",
" 'index': 158,\n",
" 'word': '▁a',\n",
" 'start': 652,\n",
" 'end': 653},\n",
" {'entity': '0',\n",
" 'score': 0.9996898,\n",
" 'index': 159,\n",
" 'word': '▁human',\n",
" 'start': 654,\n",
" 'end': 659},\n",
" {'entity': '0',\n",
" 'score': 0.99964654,\n",
" 'index': 160,\n",
" 'word': '▁head',\n",
" 'start': 660,\n",
" 'end': 664},\n",
" {'entity': '0',\n",
" 'score': 0.9999701,\n",
" 'index': 161,\n",
" 'word': '▁and',\n",
" 'start': 665,\n",
" 'end': 668},\n",
" {'entity': '0',\n",
" 'score': 0.6061729,\n",
" 'index': 162,\n",
" 'word': '▁face',\n",
" 'start': 669,\n",
" 'end': 673},\n",
" {'entity': '0',\n",
" 'score': 0.9770811,\n",
" 'index': 163,\n",
" 'word': ',',\n",
" 'start': 673,\n",
" 'end': 674},\n",
" {'entity': '0',\n",
" 'score': 0.99860054,\n",
" 'index': 164,\n",
" 'word': '▁but',\n",
" 'start': 675,\n",
" 'end': 678},\n",
" {'entity': '0',\n",
" 'score': 0.999987,\n",
" 'index': 165,\n",
" 'word': '▁all',\n",
" 'start': 679,\n",
" 'end': 682},\n",
" {'entity': '0',\n",
" 'score': 0.99998677,\n",
" 'index': 166,\n",
" 'word': '▁of',\n",
" 'start': 683,\n",
" 'end': 685},\n",
" {'entity': '0',\n",
" 'score': 0.99996483,\n",
" 'index': 167,\n",
" 'word': '▁it',\n",
" 'start': 686,\n",
" 'end': 688},\n",
" {'entity': '0',\n",
" 'score': 0.9999796,\n",
" 'index': 168,\n",
" 'word': '▁was',\n",
" 'start': 689,\n",
" 'end': 692},\n",
" {'entity': '0',\n",
" 'score': 0.99998987,\n",
" 'index': 169,\n",
" 'word': '▁for',\n",
" 'start': 693,\n",
" 'end': 696},\n",
" {'entity': '0',\n",
" 'score': 0.99998856,\n",
" 'index': 170,\n",
" 'word': 'med',\n",
" 'start': 696,\n",
" 'end': 699},\n",
" {'entity': '0',\n",
" 'score': 0.99998856,\n",
" 'index': 171,\n",
" 'word': '▁by',\n",
" 'start': 700,\n",
" 'end': 702},\n",
" {'entity': '0',\n",
" 'score': 0.98339117,\n",
" 'index': 172,\n",
" 'word': '▁shadow',\n",
" 'start': 703,\n",
" 'end': 709},\n",
" {'entity': '0',\n",
" 'score': 0.98900115,\n",
" 'index': 173,\n",
" 'word': 's',\n",
" 'start': 709,\n",
" 'end': 710},\n",
" {'entity': '0',\n",
" 'score': 0.98375016,\n",
" 'index': 174,\n",
" 'word': '.',\n",
" 'start': 710,\n",
" 'end': 711},\n",
" {'entity': '0',\n",
" 'score': 0.9999788,\n",
" 'index': 175,\n",
" 'word': '▁We',\n",
" 'start': 712,\n",
" 'end': 714},\n",
" {'entity': '0',\n",
" 'score': 0.99996126,\n",
" 'index': 176,\n",
" 'word': '▁only',\n",
" 'start': 715,\n",
" 'end': 719},\n",
" {'entity': '0',\n",
" 'score': 0.9999857,\n",
" 'index': 177,\n",
" 'word': '▁announced',\n",
" 'start': 720,\n",
" 'end': 729},\n",
" {'entity': '0',\n",
" 'score': 0.97736925,\n",
" 'index': 178,\n",
" 'word': '▁it',\n",
" 'start': 730,\n",
" 'end': 732},\n",
" {'entity': '0',\n",
" 'score': 0.99984276,\n",
" 'index': 179,\n",
" 'word': '▁because',\n",
" 'start': 733,\n",
" 'end': 740},\n",
" {'entity': '0',\n",
" 'score': 0.99998534,\n",
" 'index': 180,\n",
" 'word': '▁we',\n",
" 'start': 741,\n",
" 'end': 743},\n",
" {'entity': '0',\n",
" 'score': 0.9997471,\n",
" 'index': 181,\n",
" 'word': '▁thought',\n",
" 'start': 744,\n",
" 'end': 751},\n",
" {'entity': '0',\n",
" 'score': 0.9999869,\n",
" 'index': 182,\n",
" 'word': '▁it',\n",
" 'start': 752,\n",
" 'end': 754},\n",
" {'entity': '0',\n",
" 'score': 0.9999846,\n",
" 'index': 183,\n",
" 'word': '▁would',\n",
" 'start': 755,\n",
" 'end': 760},\n",
" {'entity': '0',\n",
" 'score': 0.99996984,\n",
" 'index': 184,\n",
" 'word': '▁be',\n",
" 'start': 761,\n",
" 'end': 763},\n",
" {'entity': '0',\n",
" 'score': 0.99998915,\n",
" 'index': 185,\n",
" 'word': '▁a',\n",
" 'start': 764,\n",
" 'end': 765},\n",
" {'entity': '0',\n",
" 'score': 0.99998903,\n",
" 'index': 186,\n",
" 'word': '▁good',\n",
" 'start': 766,\n",
" 'end': 770},\n",
" {'entity': '0',\n",
" 'score': 0.9998764,\n",
" 'index': 187,\n",
" 'word': '▁way',\n",
" 'start': 771,\n",
" 'end': 774},\n",
" {'entity': '0',\n",
" 'score': 0.99996376,\n",
" 'index': 188,\n",
" 'word': '▁to',\n",
" 'start': 775,\n",
" 'end': 777},\n",
" {'entity': '0',\n",
" 'score': 0.9999871,\n",
" 'index': 189,\n",
" 'word': '▁engage',\n",
" 'start': 778,\n",
" 'end': 784},\n",
" {'entity': '0',\n",
" 'score': 0.99999,\n",
" 'index': 190,\n",
" 'word': '▁the',\n",
" 'start': 785,\n",
" 'end': 788},\n",
" {'entity': '0',\n",
" 'score': 0.9999553,\n",
" 'index': 191,\n",
" 'word': '▁public',\n",
" 'start': 789,\n",
" 'end': 795},\n",
" {'entity': '0',\n",
" 'score': 0.9999813,\n",
" 'index': 192,\n",
" 'word': '▁with',\n",
" 'start': 796,\n",
" 'end': 800},\n",
" {'entity': '0',\n",
" 'score': 0.9999882,\n",
" 'index': 193,\n",
" 'word': '▁NASA',\n",
" 'start': 801,\n",
" 'end': 805},\n",
" {'entity': '0',\n",
" 'score': 0.9999893,\n",
" 'index': 194,\n",
" 'word': \"'\",\n",
" 'start': 805,\n",
" 'end': 806},\n",
" {'entity': '0',\n",
" 'score': 0.9999865,\n",
" 'index': 195,\n",
" 'word': 's',\n",
" 'start': 806,\n",
" 'end': 807},\n",
" {'entity': '0',\n",
" 'score': 0.99429613,\n",
" 'index': 196,\n",
" 'word': '▁finding',\n",
" 'start': 808,\n",
" 'end': 815},\n",
" {'entity': '0',\n",
" 'score': 0.9942463,\n",
" 'index': 197,\n",
" 'word': 's',\n",
" 'start': 815,\n",
" 'end': 816},\n",
" {'entity': '0',\n",
" 'score': 0.99662673,\n",
" 'index': 198,\n",
" 'word': ',',\n",
" 'start': 816,\n",
" 'end': 817},\n",
" {'entity': '0',\n",
" 'score': 0.9999397,\n",
" 'index': 199,\n",
" 'word': '▁and',\n",
" 'start': 818,\n",
" 'end': 821},\n",
" {'entity': '0',\n",
" 'score': 0.99998295,\n",
" 'index': 200,\n",
" 'word': '▁at',\n",
" 'start': 822,\n",
" 'end': 824},\n",
" {'entity': '0',\n",
" 'score': 0.9999753,\n",
" 'index': 201,\n",
" 'word': 'rra',\n",
" 'start': 824,\n",
" 'end': 827},\n",
" {'entity': '0',\n",
" 'score': 0.9999758,\n",
" 'index': 202,\n",
" 'word': 'ct',\n",
" 'start': 827,\n",
" 'end': 829},\n",
" {'entity': '0',\n",
" 'score': 0.9999795,\n",
" 'index': 203,\n",
" 'word': '▁attention',\n",
" 'start': 830,\n",
" 'end': 839},\n",
" {'entity': '0',\n",
" 'score': 0.99981695,\n",
" 'index': 204,\n",
" 'word': '▁to',\n",
" 'start': 840,\n",
" 'end': 842},\n",
" {'entity': '0',\n",
" 'score': 0.9528797,\n",
" 'index': 205,\n",
" 'word': '▁Mars',\n",
" 'start': 843,\n",
" 'end': 847},\n",
" {'entity': '0',\n",
" 'score': 0.9499835,\n",
" 'index': 206,\n",
" 'word': '-',\n",
" 'start': 847,\n",
" 'end': 848},\n",
" {'entity': '0',\n",
" 'score': 0.96378785,\n",
" 'index': 207,\n",
" 'word': '-',\n",
" 'start': 848,\n",
" 'end': 849},\n",
" {'entity': '0',\n",
" 'score': 0.9997296,\n",
" 'index': 208,\n",
" 'word': '▁and',\n",
" 'start': 850,\n",
" 'end': 853},\n",
" {'entity': '0',\n",
" 'score': 0.999982,\n",
" 'index': 209,\n",
" 'word': '▁it',\n",
" 'start': 854,\n",
" 'end': 856},\n",
" {'entity': '0',\n",
" 'score': 0.7720657,\n",
" 'index': 210,\n",
" 'word': '▁did',\n",
" 'start': 857,\n",
" 'end': 860},\n",
" {'entity': '0',\n",
" 'score': 0.9128729,\n",
" 'index': 211,\n",
" 'word': '.',\n",
" 'start': 860,\n",
" 'end': 861},\n",
" {'entity': '0',\n",
" 'score': 0.99998784,\n",
" 'index': 212,\n",
" 'word': '▁The',\n",
" 'start': 863,\n",
" 'end': 866},\n",
" {'entity': '0',\n",
" 'score': 0.9999536,\n",
" 'index': 213,\n",
" 'word': '▁face',\n",
" 'start': 867,\n",
" 'end': 871},\n",
" {'entity': '0',\n",
" 'score': 0.99998736,\n",
" 'index': 214,\n",
" 'word': '▁on',\n",
" 'start': 872,\n",
" 'end': 874},\n",
" {'entity': '0',\n",
" 'score': 0.9995109,\n",
" 'index': 215,\n",
" 'word': '▁Mars',\n",
" 'start': 875,\n",
" 'end': 879},\n",
" {'entity': '0',\n",
" 'score': 0.9999553,\n",
" 'index': 216,\n",
" 'word': '▁soon',\n",
" 'start': 880,\n",
" 'end': 884},\n",
" {'entity': '0',\n",
" 'score': 0.9997906,\n",
" 'index': 217,\n",
" 'word': '▁became',\n",
" 'start': 885,\n",
" 'end': 891},\n",
" {'entity': '0',\n",
" 'score': 0.9999752,\n",
" 'index': 218,\n",
" 'word': '▁a',\n",
" 'start': 892,\n",
" 'end': 893},\n",
" {'entity': '0',\n",
" 'score': 0.9999167,\n",
" 'index': 219,\n",
" 'word': '▁pop',\n",
" 'start': 894,\n",
" 'end': 897},\n",
" {'entity': '0',\n",
" 'score': 0.9939778,\n",
" 'index': 220,\n",
" 'word': '▁icon',\n",
" 'start': 898,\n",
" 'end': 902},\n",
" {'entity': '0',\n",
" 'score': 0.97592837,\n",
" 'index': 221,\n",
" 'word': ';',\n",
" 'start': 902,\n",
" 'end': 903},\n",
" {'entity': '0',\n",
" 'score': 0.9999682,\n",
" 'index': 222,\n",
" 'word': '▁shot',\n",
" 'start': 904,\n",
" 'end': 908},\n",
" {'entity': '0',\n",
" 'score': 0.9999782,\n",
" 'index': 223,\n",
" 'word': '▁in',\n",
" 'start': 909,\n",
" 'end': 911},\n",
" {'entity': '0',\n",
" 'score': 0.95145255,\n",
" 'index': 224,\n",
" 'word': '▁movies',\n",
" 'start': 912,\n",
" 'end': 918},\n",
" {'entity': '0',\n",
" 'score': 0.9942631,\n",
" 'index': 225,\n",
" 'word': ',',\n",
" 'start': 918,\n",
" 'end': 919},\n",
" {'entity': '0',\n",
" 'score': 0.9999585,\n",
" 'index': 226,\n",
" 'word': '▁appeared',\n",
" 'start': 920,\n",
" 'end': 928},\n",
" {'entity': '0',\n",
" 'score': 0.9999505,\n",
" 'index': 227,\n",
" 'word': '▁in',\n",
" 'start': 929,\n",
" 'end': 931},\n",
" {'entity': '0',\n",
" 'score': 0.99071115,\n",
" 'index': 228,\n",
" 'word': '▁books',\n",
" 'start': 932,\n",
" 'end': 937},\n",
" {'entity': '0',\n",
" 'score': 0.996298,\n",
" 'index': 229,\n",
" 'word': ',',\n",
" 'start': 937,\n",
" 'end': 938},\n",
" {'entity': '0',\n",
" 'score': 0.96334654,\n",
" 'index': 230,\n",
" 'word': '▁magazine',\n",
" 'start': 939,\n",
" 'end': 947},\n",
" {'entity': '0',\n",
" 'score': 0.96163946,\n",
" 'index': 231,\n",
" 'word': 's',\n",
" 'start': 947,\n",
" 'end': 948},\n",
" {'entity': '0',\n",
" 'score': 0.99563783,\n",
" 'index': 232,\n",
" 'word': ',',\n",
" 'start': 948,\n",
" 'end': 949},\n",
" {'entity': '0',\n",
" 'score': 0.96375704,\n",
" 'index': 233,\n",
" 'word': '▁radio',\n",
" 'start': 950,\n",
" 'end': 955},\n",
" {'entity': '0',\n",
" 'score': 0.99991417,\n",
" 'index': 234,\n",
" 'word': '▁talk',\n",
" 'start': 956,\n",
" 'end': 960},\n",
" {'entity': '0',\n",
" 'score': 0.9309042,\n",
" 'index': 235,\n",
" 'word': '▁shows',\n",
" 'start': 961,\n",
" 'end': 966},\n",
" {'entity': '0',\n",
" 'score': 0.982239,\n",
" 'index': 236,\n",
" 'word': ',',\n",
" 'start': 966,\n",
" 'end': 967},\n",
" {'entity': '0',\n",
" 'score': 0.99977416,\n",
" 'index': 237,\n",
" 'word': '▁and',\n",
" 'start': 968,\n",
" 'end': 971},\n",
" {'entity': '0',\n",
" 'score': 0.9998679,\n",
" 'index': 238,\n",
" 'word': '▁ha',\n",
" 'start': 972,\n",
" 'end': 974},\n",
" {'entity': '0',\n",
" 'score': 0.9998386,\n",
" 'index': 239,\n",
" 'word': 'un',\n",
" 'start': 974,\n",
" 'end': 976},\n",
" {'entity': '0',\n",
" 'score': 0.99986124,\n",
" 'index': 240,\n",
" 'word': 'ted',\n",
" 'start': 976,\n",
" 'end': 979},\n",
" {'entity': '0',\n",
" 'score': 0.9997329,\n",
" 'index': 241,\n",
" 'word': '▁gro',\n",
" 'start': 980,\n",
" 'end': 983},\n",
" {'entity': '0',\n",
" 'score': 0.99980766,\n",
" 'index': 242,\n",
" 'word': 'cer',\n",
" 'start': 983,\n",
" 'end': 986},\n",
" {'entity': '0',\n",
" 'score': 0.9998802,\n",
" 'index': 243,\n",
" 'word': 'y',\n",
" 'start': 986,\n",
" 'end': 987},\n",
" {'entity': '0',\n",
" 'score': 0.99945086,\n",
" 'index': 244,\n",
" 'word': '▁store',\n",
" 'start': 988,\n",
" 'end': 993},\n",
" {'entity': '0',\n",
" 'score': 0.99998605,\n",
" 'index': 245,\n",
" 'word': '▁check',\n",
" 'start': 994,\n",
" 'end': 999},\n",
" {'entity': '0',\n",
" 'score': 0.9999876,\n",
" 'index': 246,\n",
" 'word': 'out',\n",
" 'start': 999,\n",
" 'end': 1002},\n",
" {'entity': '0',\n",
" 'score': 0.9878735,\n",
" 'index': 247,\n",
" 'word': '▁lines',\n",
" 'start': 1003,\n",
" 'end': 1008},\n",
" {'entity': '0',\n",
" 'score': 0.999974,\n",
" 'index': 248,\n",
" 'word': '▁for',\n",
" 'start': 1009,\n",
" 'end': 1012},\n",
" {'entity': '0',\n",
" 'score': 0.9999863,\n",
" 'index': 249,\n",
" 'word': '▁25',\n",
" 'start': 1013,\n",
" 'end': 1015},\n",
" {'entity': '0',\n",
" 'score': 0.9723751,\n",
" 'index': 250,\n",
" 'word': '▁years',\n",
" 'start': 1016,\n",
" 'end': 1021},\n",
" {'entity': '0',\n",
" 'score': 0.970573,\n",
" 'index': 251,\n",
" 'word': '.',\n",
" 'start': 1021,\n",
" 'end': 1022},\n",
" {'entity': '0',\n",
" 'score': 0.9999856,\n",
" 'index': 252,\n",
" 'word': '▁Some',\n",
" 'start': 1023,\n",
" 'end': 1027},\n",
" {'entity': '0',\n",
" 'score': 0.9999589,\n",
" 'index': 253,\n",
" 'word': '▁people',\n",
" 'start': 1028,\n",
" 'end': 1034},\n",
" {'entity': '0',\n",
" 'score': 0.9989717,\n",
" 'index': 254,\n",
" 'word': '▁thought',\n",
" 'start': 1035,\n",
" 'end': 1042},\n",
" {'entity': '0',\n",
" 'score': 0.9999881,\n",
" 'index': 255,\n",
" 'word': '▁the',\n",
" 'start': 1043,\n",
" 'end': 1046},\n",
" {'entity': '0',\n",
" 'score': 0.9999894,\n",
" 'index': 256,\n",
" 'word': '▁natural',\n",
" 'start': 1047,\n",
" 'end': 1054},\n",
" {'entity': '0',\n",
" 'score': 0.9999033,\n",
" 'index': 257,\n",
" 'word': '▁land',\n",
" 'start': 1055,\n",
" 'end': 1059},\n",
" {'entity': '0',\n",
" 'score': 0.9999039,\n",
" 'index': 258,\n",
" 'word': 'form',\n",
" 'start': 1059,\n",
" 'end': 1063},\n",
" {'entity': '0',\n",
" 'score': 0.9999722,\n",
" 'index': 259,\n",
" 'word': '▁was',\n",
" 'start': 1064,\n",
" 'end': 1067},\n",
" {'entity': '0',\n",
" 'score': 0.9999889,\n",
" 'index': 260,\n",
" 'word': '▁evidence',\n",
" 'start': 1068,\n",
" 'end': 1076},\n",
" {'entity': '0',\n",
" 'score': 0.9999753,\n",
" 'index': 261,\n",
" 'word': '▁of',\n",
" 'start': 1077,\n",
" 'end': 1079},\n",
" {'entity': '0',\n",
" 'score': 0.9999536,\n",
" 'index': 262,\n",
" 'word': '▁life',\n",
" 'start': 1080,\n",
" 'end': 1084},\n",
" {'entity': '0',\n",
" 'score': 0.9999864,\n",
" 'index': 263,\n",
" 'word': '▁on',\n",
" 'start': 1085,\n",
" 'end': 1087},\n",
" {'entity': '0',\n",
" 'score': 0.96373415,\n",
" 'index': 264,\n",
" 'word': '▁Mars',\n",
" 'start': 1088,\n",
" 'end': 1092},\n",
" {'entity': '0',\n",
" 'score': 0.9778991,\n",
" 'index': 265,\n",
" 'word': ',',\n",
" 'start': 1092,\n",
" 'end': 1093},\n",
" {'entity': '0',\n",
" 'score': 0.9999219,\n",
" 'index': 266,\n",
" 'word': '▁and',\n",
" 'start': 1094,\n",
" 'end': 1097},\n",
" {'entity': '0',\n",
" 'score': 0.9998994,\n",
" 'index': 267,\n",
" 'word': '▁that',\n",
" 'start': 1098,\n",
" 'end': 1102},\n",
" {'entity': '0',\n",
" 'score': 0.9998392,\n",
" 'index': 268,\n",
" 'word': '▁us',\n",
" 'start': 1103,\n",
" 'end': 1105},\n",
" {'entity': '0',\n",
" 'score': 0.9998784,\n",
" 'index': 269,\n",
" 'word': '▁scientist',\n",
" 'start': 1106,\n",
" 'end': 1115},\n",
" {'entity': '0',\n",
" 'score': 0.99990356,\n",
" 'index': 270,\n",
" 'word': 's',\n",
" 'start': 1115,\n",
" 'end': 1116},\n",
" {'entity': '0',\n",
" 'score': 0.9999876,\n",
" 'index': 271,\n",
" 'word': '▁wanted',\n",
" 'start': 1117,\n",
" 'end': 1123},\n",
" {'entity': '0',\n",
" 'score': 0.99998724,\n",
" 'index': 272,\n",
" 'word': '▁to',\n",
" 'start': 1124,\n",
" 'end': 1126},\n",
" {'entity': '0',\n",
" 'score': 0.99998665,\n",
" 'index': 273,\n",
" 'word': '▁hi',\n",
" 'start': 1127,\n",
" 'end': 1129},\n",
" {'entity': '0',\n",
" 'score': 0.9999869,\n",
" 'index': 274,\n",
" 'word': 'de',\n",
" 'start': 1129,\n",
" 'end': 1131},\n",
" {'entity': '0',\n",
" 'score': 0.6984844,\n",
" 'index': 275,\n",
" 'word': '▁it',\n",
" 'start': 1132,\n",
" 'end': 1134},\n",
" {'entity': '0',\n",
" 'score': 0.93957794,\n",
" 'index': 276,\n",
" 'word': ',',\n",
" 'start': 1134,\n",
" 'end': 1135},\n",
" {'entity': '0',\n",
" 'score': 0.88728845,\n",
" 'index': 277,\n",
" 'word': '▁but',\n",
" 'start': 1136,\n",
" 'end': 1139},\n",
" {'entity': '0',\n",
" 'score': 0.8514652,\n",
" 'index': 278,\n",
" 'word': '▁really',\n",
" 'start': 1140,\n",
" 'end': 1146},\n",
" {'entity': '0',\n",
" 'score': 0.9391215,\n",
" 'index': 279,\n",
" 'word': ',',\n",
" 'start': 1146,\n",
" 'end': 1147},\n",
" {'entity': '0',\n",
" 'score': 0.99998224,\n",
" 'index': 280,\n",
" 'word': '▁the',\n",
" 'start': 1148,\n",
" 'end': 1151},\n",
" {'entity': '0',\n",
" 'score': 0.9999875,\n",
" 'index': 281,\n",
" 'word': '▁defender',\n",
" 'start': 1152,\n",
" 'end': 1160},\n",
" {'entity': '0',\n",
" 'score': 0.9999875,\n",
" 'index': 282,\n",
" 'word': 's',\n",
" 'start': 1160,\n",
" 'end': 1161},\n",
" {'entity': '0',\n",
" 'score': 0.99997973,\n",
" 'index': 283,\n",
" 'word': '▁of',\n",
" 'start': 1162,\n",
" 'end': 1164},\n",
" {'entity': '0',\n",
" 'score': 0.9999875,\n",
" 'index': 284,\n",
" 'word': '▁the',\n",
" 'start': 1165,\n",
" 'end': 1168},\n",
" {'entity': '0',\n",
" 'score': 0.99969745,\n",
" 'index': 285,\n",
" 'word': '▁NASA',\n",
" 'start': 1169,\n",
" 'end': 1173},\n",
" {'entity': '0',\n",
" 'score': 0.9937796,\n",
" 'index': 286,\n",
" 'word': '▁budget',\n",
" 'start': 1174,\n",
" 'end': 1180},\n",
" {'entity': '0',\n",
" 'score': 0.9906232,\n",
" 'index': 287,\n",
" 'word': '▁wish',\n",
" 'start': 1181,\n",
" 'end': 1185},\n",
" {'entity': '0',\n",
" 'score': 0.99996674,\n",
" 'index': 288,\n",
" 'word': '▁there',\n",
" 'start': 1186,\n",
" 'end': 1191},\n",
" {'entity': '0',\n",
" 'score': 0.99969625,\n",
" 'index': 289,\n",
" 'word': '▁was',\n",
" 'start': 1192,\n",
" 'end': 1195},\n",
" {'entity': '0',\n",
" 'score': 0.99998605,\n",
" 'index': 290,\n",
" 'word': '▁an',\n",
" 'start': 1196,\n",
" 'end': 1198},\n",
" {'entity': '0',\n",
" 'score': 0.99998045,\n",
" 'index': 291,\n",
" 'word': 'cient',\n",
" 'start': 1198,\n",
" 'end': 1203},\n",
" {'entity': '0',\n",
" 'score': 0.9999665,\n",
" 'index': 292,\n",
" 'word': '▁civiliza',\n",
" 'start': 1204,\n",
" 'end': 1212},\n",
" {'entity': '0',\n",
" 'score': 0.99996734,\n",
" 'index': 293,\n",
" 'word': 'tion',\n",
" 'start': 1212,\n",
" 'end': 1216},\n",
" {'entity': '0',\n",
" 'score': 0.99998844,\n",
" 'index': 294,\n",
" 'word': '▁on',\n",
" 'start': 1217,\n",
" 'end': 1219},\n",
" {'entity': '0',\n",
" 'score': 0.9755779,\n",
" 'index': 295,\n",
" 'word': '▁Mars',\n",
" 'start': 1220,\n",
" 'end': 1224},\n",
" {'entity': '0',\n",
" 'score': 0.98278147,\n",
" 'index': 296,\n",
" 'word': '.',\n",
" 'start': 1224,\n",
" 'end': 1225},\n",
" {'entity': '0',\n",
" 'score': 0.9999839,\n",
" 'index': 297,\n",
" 'word': '▁We',\n",
" 'start': 1226,\n",
" 'end': 1228},\n",
" {'entity': '0',\n",
" 'score': 0.9999614,\n",
" 'index': 298,\n",
" 'word': '▁decided',\n",
" 'start': 1229,\n",
" 'end': 1236},\n",
" {'entity': '0',\n",
" 'score': 0.99998903,\n",
" 'index': 299,\n",
" 'word': '▁to',\n",
" 'start': 1237,\n",
" 'end': 1239},\n",
" {'entity': '0',\n",
" 'score': 0.9999876,\n",
" 'index': 300,\n",
" 'word': '▁take',\n",
" 'start': 1240,\n",
" 'end': 1244},\n",
" {'entity': '0',\n",
" 'score': 0.9999888,\n",
" 'index': 301,\n",
" 'word': '▁another',\n",
" 'start': 1245,\n",
" 'end': 1252},\n",
" {'entity': '0',\n",
" 'score': 0.576694,\n",
" 'index': 302,\n",
" 'word': '▁shot',\n",
" 'start': 1253,\n",
" 'end': 1257},\n",
" {'entity': '0',\n",
" 'score': 0.9998617,\n",
" 'index': 303,\n",
" 'word': '▁just',\n",
" 'start': 1258,\n",
" 'end': 1262},\n",
" {'entity': '0',\n",
" 'score': 0.999987,\n",
" 'index': 304,\n",
" 'word': '▁to',\n",
" 'start': 1263,\n",
" 'end': 1265},\n",
" {'entity': '0',\n",
" 'score': 0.99998534,\n",
" 'index': 305,\n",
" 'word': '▁make',\n",
" 'start': 1266,\n",
" 'end': 1270},\n",
" {'entity': '0',\n",
" 'score': 0.99988437,\n",
" 'index': 306,\n",
" 'word': '▁sure',\n",
" 'start': 1271,\n",
" 'end': 1275},\n",
" {'entity': '0',\n",
" 'score': 0.9999877,\n",
" 'index': 307,\n",
" 'word': '▁we',\n",
" 'start': 1276,\n",
" 'end': 1278},\n",
" {'entity': '0',\n",
" 'score': 0.9999863,\n",
" 'index': 308,\n",
" 'word': '▁were',\n",
" 'start': 1279,\n",
" 'end': 1283},\n",
" {'entity': '0',\n",
" 'score': 0.9999697,\n",
" 'index': 309,\n",
" 'word': 'n',\n",
" 'start': 1283,\n",
" 'end': 1284},\n",
" {'entity': '0',\n",
" 'score': 0.9999664,\n",
" 'index': 310,\n",
" 'word': \"'\",\n",
" 'start': 1284,\n",
" 'end': 1285},\n",
" {'entity': '0',\n",
" 'score': 0.99997973,\n",
" 'index': 311,\n",
" 'word': 't',\n",
" 'start': 1285,\n",
" 'end': 1286},\n",
" {'entity': '0',\n",
" 'score': 0.92863667,\n",
" 'index': 312,\n",
" 'word': '▁wrong',\n",
" 'start': 1287,\n",
" 'end': 1292},\n",
" {'entity': '0',\n",
" 'score': 0.9716512,\n",
" 'index': 313,\n",
" 'word': ',',\n",
" 'start': 1292,\n",
" 'end': 1293},\n",
" {'entity': '0',\n",
" 'score': 0.99990296,\n",
" 'index': 314,\n",
" 'word': '▁on',\n",
" 'start': 1294,\n",
" 'end': 1296},\n",
" {'entity': '0',\n",
" 'score': 0.99997044,\n",
" 'index': 315,\n",
" 'word': '▁April',\n",
" 'start': 1297,\n",
" 'end': 1302},\n",
" {'entity': '0',\n",
" 'score': 0.99988294,\n",
" 'index': 316,\n",
" 'word': '▁5',\n",
" 'start': 1303,\n",
" 'end': 1304},\n",
" {'entity': '0',\n",
" 'score': 0.999974,\n",
" 'index': 317,\n",
" 'word': ',',\n",
" 'start': 1304,\n",
" 'end': 1305},\n",
" {'entity': '.',\n",
" 'score': 0.95378405,\n",
" 'index': 318,\n",
" 'word': '▁1998.',\n",
" 'start': 1306,\n",
" 'end': 1311},\n",
" {'entity': '0',\n",
" 'score': 0.9999124,\n",
" 'index': 319,\n",
" 'word': '▁Michael',\n",
" 'start': 1312,\n",
" 'end': 1319},\n",
" {'entity': '0',\n",
" 'score': 0.98158044,\n",
" 'index': 320,\n",
" 'word': '▁Malin',\n",
" 'start': 1320,\n",
" 'end': 1325},\n",
" {'entity': '0',\n",
" 'score': 0.99995947,\n",
" 'index': 321,\n",
" 'word': '▁and',\n",
" 'start': 1326,\n",
" 'end': 1329},\n",
" {'entity': '0',\n",
" 'score': 0.9999808,\n",
" 'index': 322,\n",
" 'word': '▁his',\n",
" 'start': 1330,\n",
" 'end': 1333},\n",
" {'entity': '0',\n",
" 'score': 0.99976593,\n",
" 'index': 323,\n",
" 'word': '▁Mars',\n",
" 'start': 1334,\n",
" 'end': 1338},\n",
" {'entity': '0',\n",
" 'score': 0.99988925,\n",
" 'index': 324,\n",
" 'word': '▁Or',\n",
" 'start': 1339,\n",
" 'end': 1341},\n",
" {'entity': '0',\n",
" 'score': 0.99988866,\n",
" 'index': 325,\n",
" 'word': 'bit',\n",
" 'start': 1341,\n",
" 'end': 1344},\n",
" {'entity': '0',\n",
" 'score': 0.99990416,\n",
" 'index': 326,\n",
" 'word': 'er',\n",
" 'start': 1344,\n",
" 'end': 1346},\n",
" {'entity': '0',\n",
" 'score': 0.9998735,\n",
" 'index': 327,\n",
" 'word': '▁camera',\n",
" 'start': 1347,\n",
" 'end': 1353},\n",
" {'entity': '0',\n",
" 'score': 0.9773008,\n",
" 'index': 328,\n",
" 'word': '▁team',\n",
" 'start': 1354,\n",
" 'end': 1358},\n",
" {'entity': '0',\n",
" 'score': 0.99995935,\n",
" 'index': 329,\n",
" 'word': '▁took',\n",
" 'start': 1359,\n",
" 'end': 1363},\n",
" {'entity': '0',\n",
" 'score': 0.99998975,\n",
" 'index': 330,\n",
" 'word': '▁a',\n",
" 'start': 1364,\n",
" 'end': 1365},\n",
" {'entity': '0',\n",
" 'score': 0.9995982,\n",
" 'index': 331,\n",
" 'word': '▁picture',\n",
" 'start': 1366,\n",
" 'end': 1373},\n",
" {'entity': '0',\n",
" 'score': 0.9999869,\n",
" 'index': 332,\n",
" 'word': '▁that',\n",
" 'start': 1374,\n",
" 'end': 1378},\n",
" {'entity': '0',\n",
" 'score': 0.9999856,\n",
" 'index': 333,\n",
" 'word': '▁was',\n",
" 'start': 1379,\n",
" 'end': 1382},\n",
" {'entity': '0',\n",
" 'score': 0.99999,\n",
" 'index': 334,\n",
" 'word': '▁ten',\n",
" 'start': 1383,\n",
" 'end': 1386},\n",
" {'entity': '0',\n",
" 'score': 0.9999894,\n",
" 'index': 335,\n",
" 'word': '▁times',\n",
" 'start': 1387,\n",
" 'end': 1392},\n",
" {'entity': '0',\n",
" 'score': 0.99997914,\n",
" 'index': 336,\n",
" 'word': '▁sharp',\n",
" 'start': 1393,\n",
" 'end': 1398},\n",
" {'entity': '0',\n",
" 'score': 0.99998355,\n",
" 'index': 337,\n",
" 'word': 'er',\n",
" 'start': 1398,\n",
" 'end': 1400},\n",
" {'entity': '0',\n",
" 'score': 0.9999882,\n",
" 'index': 338,\n",
" 'word': '▁than',\n",
" 'start': 1401,\n",
" 'end': 1405},\n",
" {'entity': '0',\n",
" 'score': 0.99998236,\n",
" 'index': 339,\n",
" 'word': '▁the',\n",
" 'start': 1406,\n",
" 'end': 1409},\n",
" {'entity': '0',\n",
" 'score': 0.991712,\n",
" 'index': 340,\n",
" 'word': '▁original',\n",
" 'start': 1410,\n",
" 'end': 1418},\n",
" {'entity': '0',\n",
" 'score': 0.99961096,\n",
" 'index': 341,\n",
" 'word': '▁Viking',\n",
" 'start': 1419,\n",
" 'end': 1425},\n",
" {'entity': '0',\n",
" 'score': 0.9250188,\n",
" 'index': 342,\n",
" 'word': '▁photos',\n",
" 'start': 1426,\n",
" 'end': 1432},\n",
" {'entity': '0',\n",
" 'score': 0.9903465,\n",
" 'index': 343,\n",
" 'word': ',',\n",
" 'start': 1432,\n",
" 'end': 1433},\n",
" {'entity': '0',\n",
" 'score': 0.9999404,\n",
" 'index': 344,\n",
" 'word': '▁reveal',\n",
" 'start': 1434,\n",
" 'end': 1440},\n",
" {'entity': '0',\n",
" 'score': 0.99993956,\n",
" 'index': 345,\n",
" 'word': 'ing',\n",
" 'start': 1440,\n",
" 'end': 1443},\n",
" {'entity': '0',\n",
" 'score': 0.9999809,\n",
" 'index': 346,\n",
" 'word': '▁a',\n",
" 'start': 1444,\n",
" 'end': 1445},\n",
" {'entity': '0',\n",
" 'score': 0.99997795,\n",
" 'index': 347,\n",
" 'word': '▁natural',\n",
" 'start': 1446,\n",
" 'end': 1453},\n",
" {'entity': '0',\n",
" 'score': 0.93572664,\n",
" 'index': 348,\n",
" 'word': '▁land',\n",
" 'start': 1454,\n",
" 'end': 1458},\n",
" {'entity': '0',\n",
" 'score': 0.94067293,\n",
" 'index': 349,\n",
" 'word': 'form',\n",
" 'start': 1458,\n",
" 'end': 1462},\n",
" {'entity': '0',\n",
" 'score': 0.9671829,\n",
" 'index': 350,\n",
" 'word': ',',\n",
" 'start': 1462,\n",
" 'end': 1463},\n",
" {'entity': '0',\n",
" 'score': 0.99992955,\n",
" 'index': 351,\n",
" 'word': '▁which',\n",
" 'start': 1464,\n",
" 'end': 1469},\n",
" {'entity': '0',\n",
" 'score': 0.9984102,\n",
" 'index': 352,\n",
" 'word': '▁meant',\n",
" 'start': 1470,\n",
" 'end': 1475},\n",
" {'entity': '0',\n",
" 'score': 0.9998084,\n",
" 'index': 353,\n",
" 'word': '▁no',\n",
" 'start': 1476,\n",
" 'end': 1478},\n",
" {'entity': '0',\n",
" 'score': 0.99996555,\n",
" 'index': 354,\n",
" 'word': '▁alien',\n",
" 'start': 1479,\n",
" 'end': 1484},\n",
" {'entity': '0',\n",
" 'score': 0.9784937,\n",
" 'index': 355,\n",
" 'word': '▁monument',\n",
" 'start': 1485,\n",
" 'end': 1493},\n",
" {'entity': '0',\n",
" 'score': 0.9887007,\n",
" 'index': 356,\n",
" 'word': '.',\n",
" 'start': 1493,\n",
" 'end': 1494},\n",
" {'entity': '0',\n",
" 'score': 0.9980032,\n",
" 'index': 357,\n",
" 'word': '▁\"',\n",
" 'start': 1495,\n",
" 'end': 1496},\n",
" {'entity': '0',\n",
" 'score': 0.99696094,\n",
" 'index': 358,\n",
" 'word': 'But',\n",
" 'start': 1496,\n",
" 'end': 1499},\n",
" {'entity': '0',\n",
" 'score': 0.99998856,\n",
" 'index': 359,\n",
" 'word': '▁that',\n",
" 'start': 1500,\n",
" 'end': 1504},\n",
" {'entity': '0',\n",
" 'score': 0.9999746,\n",
" 'index': 360,\n",
" 'word': '▁picture',\n",
" 'start': 1505,\n",
" 'end': 1512},\n",
" {'entity': '0',\n",
" 'score': 0.9999877,\n",
" 'index': 361,\n",
" 'word': '▁wasn',\n",
" 'start': 1513,\n",
" 'end': 1517},\n",
" {'entity': '0',\n",
" 'score': 0.9999833,\n",
" 'index': 362,\n",
" 'word': \"'\",\n",
" 'start': 1517,\n",
" 'end': 1518},\n",
" {'entity': '0',\n",
" 'score': 0.9999864,\n",
" 'index': 363,\n",
" 'word': 't',\n",
" 'start': 1518,\n",
" 'end': 1519},\n",
" {'entity': '0',\n",
" 'score': 0.9999876,\n",
" 'index': 364,\n",
" 'word': '▁very',\n",
" 'start': 1520,\n",
" 'end': 1524},\n",
" {'entity': '0',\n",
" 'score': 0.99985874,\n",
" 'index': 365,\n",
" 'word': '▁clear',\n",
" 'start': 1525,\n",
" 'end': 1530},\n",
" {'entity': '0',\n",
" 'score': 0.9999877,\n",
" 'index': 366,\n",
" 'word': '▁at',\n",
" 'start': 1531,\n",
" 'end': 1533},\n",
" {'entity': '0',\n",
" 'score': 0.9274058,\n",
" 'index': 367,\n",
" 'word': '▁all',\n",
" 'start': 1534,\n",
" 'end': 1537},\n",
" {'entity': '0',\n",
" 'score': 0.88927543,\n",
" 'index': 368,\n",
" 'word': ',',\n",
" 'start': 1537,\n",
" 'end': 1538},\n",
" {'entity': '0',\n",
" 'score': 0.99990225,\n",
" 'index': 369,\n",
" 'word': '▁which',\n",
" 'start': 1539,\n",
" 'end': 1544},\n",
" {'entity': '0',\n",
" 'score': 0.9999838,\n",
" 'index': 370,\n",
" 'word': '▁could',\n",
" 'start': 1545,\n",
" 'end': 1550},\n",
" {'entity': '0',\n",
" 'score': 0.9986493,\n",
" 'index': 371,\n",
" 'word': '▁mean',\n",
" 'start': 1551,\n",
" 'end': 1555},\n",
" {'entity': '0',\n",
" 'score': 0.999985,\n",
" 'index': 372,\n",
" 'word': '▁alien',\n",
" 'start': 1556,\n",
" 'end': 1561},\n",
" {'entity': '0',\n",
" 'score': 0.9999751,\n",
" 'index': 373,\n",
" 'word': '▁mark',\n",
" 'start': 1562,\n",
" 'end': 1566},\n",
" {'entity': '0',\n",
" 'score': 0.99997675,\n",
" 'index': 374,\n",
" 'word': 'ings',\n",
" 'start': 1566,\n",
" 'end': 1570},\n",
" {'entity': '0',\n",
" 'score': 0.9999702,\n",
" 'index': 375,\n",
" 'word': '▁were',\n",
" 'start': 1571,\n",
" 'end': 1575},\n",
" {'entity': '0',\n",
" 'score': 0.9994505,\n",
" 'index': 376,\n",
" 'word': '▁hidden',\n",
" 'start': 1576,\n",
" 'end': 1582},\n",
" {'entity': '0',\n",
" 'score': 0.99997675,\n",
" 'index': 377,\n",
" 'word': '▁by',\n",
" 'start': 1583,\n",
" 'end': 1585},\n",
" {'entity': '.',\n",
" 'score': 0.8143309,\n",
" 'index': 378,\n",
" 'word': '▁ha',\n",
" 'start': 1586,\n",
" 'end': 1588},\n",
" {'entity': '.',\n",
" 'score': 0.7348569,\n",
" 'index': 379,\n",
" 'word': 'ze',\n",
" 'start': 1588,\n",
" 'end': 1590},\n",
" {'entity': '.',\n",
" 'score': 0.90871817,\n",
" 'index': 380,\n",
" 'word': '\"',\n",
" 'start': 1590,\n",
" 'end': 1591},\n",
" {'entity': '0',\n",
" 'score': 0.51734453,\n",
" 'index': 381,\n",
" 'word': '▁Well',\n",
" 'start': 1592,\n",
" 'end': 1596},\n",
" {'entity': ',',\n",
" 'score': 0.55640894,\n",
" 'index': 382,\n",
" 'word': '▁no',\n",
" 'start': 1597,\n",
" 'end': 1599},\n",
" {'entity': '0',\n",
" 'score': 0.90493226,\n",
" 'index': 383,\n",
" 'word': ',',\n",
" 'start': 1599,\n",
" 'end': 1600},\n",
" {'entity': ',',\n",
" 'score': 0.7527332,\n",
" 'index': 384,\n",
" 'word': '▁yes',\n",
" 'start': 1601,\n",
" 'end': 1604},\n",
" {'entity': '0',\n",
" 'score': 0.9999099,\n",
" 'index': 385,\n",
" 'word': '▁that',\n",
" 'start': 1605,\n",
" 'end': 1609},\n",
" {'entity': '0',\n",
" 'score': 0.9999504,\n",
" 'index': 386,\n",
" 'word': '▁rumor',\n",
" 'start': 1610,\n",
" 'end': 1615},\n",
" {'entity': '0',\n",
" 'score': 0.9237974,\n",
" 'index': 387,\n",
" 'word': '▁started',\n",
" 'start': 1616,\n",
" 'end': 1623},\n",
" {'entity': '0',\n",
" 'score': 0.9712035,\n",
" 'index': 388,\n",
" 'word': ',',\n",
" 'start': 1623,\n",
" 'end': 1624},\n",
" {'entity': '0',\n",
" 'score': 0.8615394,\n",
" 'index': 389,\n",
" 'word': '▁but',\n",
" 'start': 1625,\n",
" 'end': 1628},\n",
" {'entity': '0',\n",
" 'score': 0.99998593,\n",
" 'index': 390,\n",
" 'word': '▁to',\n",
" 'start': 1629,\n",
" 'end': 1631},\n",
" {'entity': '0',\n",
" 'score': 0.99998534,\n",
" 'index': 391,\n",
" 'word': '▁prove',\n",
" 'start': 1632,\n",
" 'end': 1637},\n",
" {'entity': '0',\n",
" 'score': 0.99958175,\n",
" 'index': 392,\n",
" 'word': '▁them',\n",
" 'start': 1638,\n",
" 'end': 1642},\n",
" {'entity': ',',\n",
" 'score': 0.8349122,\n",
" 'index': 393,\n",
" 'word': '▁wrong',\n",
" 'start': 1643,\n",
" 'end': 1648},\n",
" {'entity': '0',\n",
" 'score': 0.9998111,\n",
" 'index': 394,\n",
" 'word': '▁on',\n",
" 'start': 1649,\n",
" 'end': 1651},\n",
" {'entity': '0',\n",
" 'score': 0.99996305,\n",
" 'index': 395,\n",
" 'word': '▁April',\n",
" 'start': 1652,\n",
" 'end': 1657},\n",
" {'entity': '0',\n",
" 'score': 0.9999033,\n",
" 'index': 396,\n",
" 'word': '▁8',\n",
" 'start': 1658,\n",
" 'end': 1659},\n",
" {'entity': '0',\n",
" 'score': 0.99996483,\n",
" 'index': 397,\n",
" 'word': ',',\n",
" 'start': 1659,\n",
" 'end': 1660},\n",
" {'entity': '0',\n",
" 'score': 0.6733225,\n",
" 'index': 398,\n",
" 'word': '▁2001',\n",
" 'start': 1661,\n",
" 'end': 1665},\n",
" {'entity': '0',\n",
" 'score': 0.9999819,\n",
" 'index': 399,\n",
" 'word': '▁we',\n",
" 'start': 1666,\n",
" 'end': 1668},\n",
" {'entity': '0',\n",
" 'score': 0.99993896,\n",
" 'index': 400,\n",
" 'word': '▁decided',\n",
" 'start': 1669,\n",
" 'end': 1676},\n",
" {'entity': '0',\n",
" 'score': 0.9999876,\n",
" 'index': 401,\n",
" 'word': '▁to',\n",
" 'start': 1677,\n",
" 'end': 1679},\n",
" {'entity': '0',\n",
" 'score': 0.99998784,\n",
" 'index': 402,\n",
" 'word': '▁take',\n",
" 'start': 1680,\n",
" 'end': 1684},\n",
" {'entity': '0',\n",
" 'score': 0.9999888,\n",
" 'index': 403,\n",
" 'word': '▁another',\n",
" 'start': 1685,\n",
" 'end': 1692},\n",
" {'entity': '0',\n",
" 'score': 0.88930416,\n",
" 'index': 404,\n",
" 'word': '▁picture',\n",
" 'start': 1693,\n",
" 'end': 1700},\n",
" {'entity': '0',\n",
" 'score': 0.9949751,\n",
" 'index': 405,\n",
" 'word': ',',\n",
" 'start': 1700,\n",
" 'end': 1701},\n",
" {'entity': '0',\n",
" 'score': 0.9999832,\n",
" 'index': 406,\n",
" 'word': '▁making',\n",
" 'start': 1702,\n",
" 'end': 1708},\n",
" {'entity': '0',\n",
" 'score': 0.99980956,\n",
" 'index': 407,\n",
" 'word': '▁sure',\n",
" 'start': 1709,\n",
" 'end': 1713},\n",
" {'entity': '0',\n",
" 'score': 0.9999883,\n",
" 'index': 408,\n",
" 'word': '▁it',\n",
" 'start': 1714,\n",
" 'end': 1716},\n",
" {'entity': '0',\n",
" 'score': 0.9998956,\n",
" 'index': 409,\n",
" 'word': '▁was',\n",
" 'start': 1717,\n",
" 'end': 1720},\n",
" {'entity': '0',\n",
" 'score': 0.9999738,\n",
" 'index': 410,\n",
" 'word': '▁a',\n",
" 'start': 1721,\n",
" 'end': 1722},\n",
" {'entity': '0',\n",
" 'score': 0.9971227,\n",
" 'index': 411,\n",
" 'word': '▁cloud',\n",
" 'start': 1723,\n",
" 'end': 1728},\n",
" {'entity': '0',\n",
" 'score': 0.99745196,\n",
" 'index': 412,\n",
" 'word': 'less',\n",
" 'start': 1728,\n",
" 'end': 1732},\n",
" {'entity': '0',\n",
" 'score': 0.9999789,\n",
" 'index': 413,\n",
" 'word': '▁summer',\n",
" 'start': 1733,\n",
" 'end': 1739},\n",
" {'entity': '0',\n",
" 'score': 0.9822667,\n",
" 'index': 414,\n",
" 'word': '▁day',\n",
" 'start': 1740,\n",
" 'end': 1743},\n",
" {'entity': '0',\n",
" 'score': 0.983127,\n",
" 'index': 415,\n",
" 'word': '.',\n",
" 'start': 1743,\n",
" 'end': 1744},\n",
" {'entity': '0',\n",
" 'score': 0.99998796,\n",
" 'index': 416,\n",
" 'word': '▁Malin',\n",
" 'start': 1745,\n",
" 'end': 1750},\n",
" {'entity': '0',\n",
" 'score': 0.9999881,\n",
" 'index': 417,\n",
" 'word': \"'\",\n",
" 'start': 1750,\n",
" 'end': 1751},\n",
" {'entity': '0',\n",
" 'score': 0.99998975,\n",
" 'index': 418,\n",
" 'word': 's',\n",
" 'start': 1751,\n",
" 'end': 1752},\n",
" {'entity': '0',\n",
" 'score': 0.9999672,\n",
" 'index': 419,\n",
" 'word': '▁team',\n",
" 'start': 1753,\n",
" 'end': 1757},\n",
" {'entity': '0',\n",
" 'score': 0.99998546,\n",
" 'index': 420,\n",
" 'word': '▁capture',\n",
" 'start': 1758,\n",
" 'end': 1765},\n",
" {'entity': '0',\n",
" 'score': 0.99997926,\n",
" 'index': 421,\n",
" 'word': 'd',\n",
" 'start': 1765,\n",
" 'end': 1766},\n",
" {'entity': '0',\n",
" 'score': 0.9999893,\n",
" 'index': 422,\n",
" 'word': '▁an',\n",
" 'start': 1767,\n",
" 'end': 1769},\n",
" {'entity': '0',\n",
" 'score': 0.9999887,\n",
" 'index': 423,\n",
" 'word': '▁amazing',\n",
" 'start': 1770,\n",
" 'end': 1777},\n",
" {'entity': '0',\n",
" 'score': 0.8521725,\n",
" 'index': 424,\n",
" 'word': '▁photo',\n",
" 'start': 1778,\n",
" 'end': 1783},\n",
" {'entity': '0',\n",
" 'score': 0.9999713,\n",
" 'index': 425,\n",
" 'word': '▁using',\n",
" 'start': 1784,\n",
" 'end': 1789},\n",
" {'entity': '0',\n",
" 'score': 0.99997926,\n",
" 'index': 426,\n",
" 'word': '▁the',\n",
" 'start': 1790,\n",
" 'end': 1793},\n",
" {'entity': '0',\n",
" 'score': 0.9999058,\n",
" 'index': 427,\n",
" 'word': '▁camera',\n",
" 'start': 1794,\n",
" 'end': 1800},\n",
" {'entity': '0',\n",
" 'score': 0.99995863,\n",
" 'index': 428,\n",
" 'word': \"'\",\n",
" 'start': 1800,\n",
" 'end': 1801},\n",
" {'entity': '0',\n",
" 'score': 0.9999323,\n",
" 'index': 429,\n",
" 'word': 's',\n",
" 'start': 1801,\n",
" 'end': 1802},\n",
" {'entity': '0',\n",
" 'score': 0.99984443,\n",
" 'index': 430,\n",
" 'word': '▁absolute',\n",
" 'start': 1803,\n",
" 'end': 1811},\n",
" {'entity': '0',\n",
" 'score': 0.9998752,\n",
" 'index': 431,\n",
" 'word': '▁maximum',\n",
" 'start': 1812,\n",
" 'end': 1819},\n",
" {'entity': '0',\n",
" 'score': 0.98818076,\n",
" 'index': 432,\n",
" 'word': '▁revolution',\n",
" 'start': 1820,\n",
" 'end': 1830},\n",
" {'entity': '0',\n",
" 'score': 0.9950777,\n",
" 'index': 433,\n",
" 'word': '.',\n",
" 'start': 1830,\n",
" 'end': 1831},\n",
" {'entity': '0',\n",
" 'score': 0.99994016,\n",
" 'index': 434,\n",
" 'word': '▁With',\n",
" 'start': 1832,\n",
" 'end': 1836},\n",
" {'entity': '0',\n",
" 'score': 0.99968374,\n",
" 'index': 435,\n",
" 'word': '▁this',\n",
" 'start': 1837,\n",
" 'end': 1841},\n",
" {'entity': '0',\n",
" 'score': 0.542863,\n",
" 'index': 436,\n",
" 'word': '▁camera',\n",
" 'start': 1842,\n",
" 'end': 1848},\n",
" {'entity': '0',\n",
" 'score': 0.99998796,\n",
" 'index': 437,\n",
" 'word': '▁you',\n",
" 'start': 1849,\n",
" 'end': 1852},\n",
" {'entity': '0',\n",
" 'score': 0.9999691,\n",
" 'index': 438,\n",
" 'word': '▁can',\n",
" 'start': 1853,\n",
" 'end': 1856},\n",
" {'entity': '0',\n",
" 'score': 0.99997675,\n",
" 'index': 439,\n",
" 'word': '▁discern',\n",
" 'start': 1857,\n",
" 'end': 1864},\n",
" {'entity': '0',\n",
" 'score': 0.9988336,\n",
" 'index': 440,\n",
" 'word': '▁things',\n",
" 'start': 1865,\n",
" 'end': 1871},\n",
" {'entity': '0',\n",
" 'score': 0.9999831,\n",
" 'index': 441,\n",
" 'word': '▁in',\n",
" 'start': 1872,\n",
" 'end': 1874},\n",
" {'entity': '0',\n",
" 'score': 0.9999877,\n",
" 'index': 442,\n",
" 'word': '▁a',\n",
" 'start': 1875,\n",
" 'end': 1876},\n",
" {'entity': '0',\n",
" 'score': 0.999985,\n",
" 'index': 443,\n",
" 'word': '▁digital',\n",
" 'start': 1877,\n",
" 'end': 1884},\n",
" {'entity': '0',\n",
" 'score': 0.992401,\n",
" 'index': 444,\n",
" 'word': '▁image',\n",
" 'start': 1885,\n",
" 'end': 1890},\n",
" {'entity': '0',\n",
" 'score': 0.9876018,\n",
" 'index': 445,\n",
" 'word': ',',\n",
" 'start': 1890,\n",
" 'end': 1891},\n",
" {'entity': '0',\n",
" 'score': 0.9998029,\n",
" 'index': 446,\n",
" 'word': '▁3',\n",
" 'start': 1892,\n",
" 'end': 1893},\n",
" {'entity': '0',\n",
" 'score': 0.9999795,\n",
" 'index': 447,\n",
" 'word': '▁times',\n",
" 'start': 1894,\n",
" 'end': 1899},\n",
" {'entity': '0',\n",
" 'score': 0.99997413,\n",
" 'index': 448,\n",
" 'word': '▁bigger',\n",
" 'start': 1900,\n",
" 'end': 1906},\n",
" {'entity': '0',\n",
" 'score': 0.999977,\n",
" 'index': 449,\n",
" 'word': '▁than',\n",
" 'start': 1907,\n",
" 'end': 1911},\n",
" {'entity': '0',\n",
" 'score': 0.9999838,\n",
" 'index': 450,\n",
" 'word': '▁the',\n",
" 'start': 1912,\n",
" 'end': 1915},\n",
" {'entity': '0',\n",
" 'score': 0.99995077,\n",
" 'index': 451,\n",
" 'word': '▁pixel',\n",
" 'start': 1916,\n",
" 'end': 1921},\n",
" {'entity': ',',\n",
" 'score': 0.844227,\n",
" 'index': 452,\n",
" 'word': '▁size',\n",
" 'start': 1922,\n",
" 'end': 1926},\n",
" {'entity': '0',\n",
" 'score': 0.9999434,\n",
" 'index': 453,\n",
" 'word': '▁which',\n",
" 'start': 1927,\n",
" 'end': 1932},\n",
" {'entity': '0',\n",
" 'score': 0.8158125,\n",
" 'index': 454,\n",
" 'word': '▁means',\n",
" 'start': 1933,\n",
" 'end': 1938},\n",
" {'entity': '0',\n",
" 'score': 0.9999814,\n",
" 'index': 455,\n",
" 'word': '▁if',\n",
" 'start': 1939,\n",
" 'end': 1941},\n",
" {'entity': '0',\n",
" 'score': 0.99998975,\n",
" 'index': 456,\n",
" 'word': '▁there',\n",
" 'start': 1942,\n",
" 'end': 1947},\n",
" {'entity': '0',\n",
" 'score': 0.9999827,\n",
" 'index': 457,\n",
" 'word': '▁were',\n",
" 'start': 1948,\n",
" 'end': 1952},\n",
" {'entity': '0',\n",
" 'score': 0.9999871,\n",
" 'index': 458,\n",
" 'word': '▁any',\n",
" 'start': 1953,\n",
" 'end': 1956},\n",
" {'entity': '0',\n",
" 'score': 0.9999901,\n",
" 'index': 459,\n",
" 'word': '▁sign',\n",
" 'start': 1957,\n",
" 'end': 1961},\n",
" {'entity': '0',\n",
" 'score': 0.99998987,\n",
" 'index': 460,\n",
" 'word': 's',\n",
" 'start': 1961,\n",
" 'end': 1962},\n",
" {'entity': '0',\n",
" 'score': 0.99998915,\n",
" 'index': 461,\n",
" 'word': '▁of',\n",
" 'start': 1963,\n",
" 'end': 1965},\n",
" {'entity': '0',\n",
" 'score': 0.8491108,\n",
" 'index': 462,\n",
" 'word': '▁life',\n",
" 'start': 1966,\n",
" 'end': 1970},\n",
" {'entity': '0',\n",
" 'score': 0.99971646,\n",
" 'index': 463,\n",
" 'word': ',',\n",
" 'start': 1970,\n",
" 'end': 1971},\n",
" {'entity': '0',\n",
" 'score': 0.9999896,\n",
" 'index': 464,\n",
" 'word': '▁you',\n",
" 'start': 1972,\n",
" 'end': 1975},\n",
" {'entity': '0',\n",
" 'score': 0.9999862,\n",
" 'index': 465,\n",
" 'word': '▁could',\n",
" 'start': 1976,\n",
" 'end': 1981},\n",
" {'entity': '0',\n",
" 'score': 0.999987,\n",
" 'index': 466,\n",
" 'word': '▁easily',\n",
" 'start': 1982,\n",
" 'end': 1988},\n",
" {'entity': '0',\n",
" 'score': 0.99995744,\n",
" 'index': 467,\n",
" 'word': '▁see',\n",
" 'start': 1989,\n",
" 'end': 1992},\n",
" {'entity': '0',\n",
" 'score': 0.99999,\n",
" 'index': 468,\n",
" 'word': '▁what',\n",
" 'start': 1993,\n",
" 'end': 1997},\n",
" {'entity': '0',\n",
" 'score': 0.9999901,\n",
" 'index': 469,\n",
" 'word': '▁they',\n",
" 'start': 1998,\n",
" 'end': 2002},\n",
" {'entity': '0',\n",
" 'score': 0.96247953,\n",
" 'index': 470,\n",
" 'word': '▁were',\n",
" 'start': 2003,\n",
" 'end': 2007},\n",
" {'entity': '0',\n",
" 'score': 0.99191755,\n",
" 'index': 471,\n",
" 'word': '.',\n",
" 'start': 2007,\n",
" 'end': 2008},\n",
" {'entity': '0',\n",
" 'score': 0.9999583,\n",
" 'index': 472,\n",
" 'word': '▁What',\n",
" 'start': 2009,\n",
" 'end': 2013},\n",
" {'entity': '0',\n",
" 'score': 0.99999034,\n",
" 'index': 473,\n",
" 'word': '▁the',\n",
" 'start': 2014,\n",
" 'end': 2017},\n",
" {'entity': '0',\n",
" 'score': 0.99998474,\n",
" 'index': 474,\n",
" 'word': '▁picture',\n",
" 'start': 2018,\n",
" 'end': 2025},\n",
" {'entity': '0',\n",
" 'score': 0.99641967,\n",
" 'index': 475,\n",
" 'word': '▁showed',\n",
" 'start': 2026,\n",
" 'end': 2032},\n",
" {'entity': '0',\n",
" 'score': 0.9996567,\n",
" 'index': 476,\n",
" 'word': '▁was',\n",
" 'start': 2033,\n",
" 'end': 2036},\n",
" {'entity': '0',\n",
" 'score': 0.99991846,\n",
" 'index': 477,\n",
" 'word': '▁the',\n",
" 'start': 2037,\n",
" 'end': 2040},\n",
" {'entity': '0',\n",
" 'score': 0.7575091,\n",
" 'index': 478,\n",
" 'word': '▁but',\n",
" 'start': 2041,\n",
" 'end': 2044},\n",
" {'entity': '0',\n",
" 'score': 0.7503565,\n",
" 'index': 479,\n",
" 'word': 'te',\n",
" 'start': 2044,\n",
" 'end': 2046},\n",
" {'entity': '0',\n",
" 'score': 0.998519,\n",
" 'index': 480,\n",
" 'word': '▁or',\n",
" 'start': 2047,\n",
" 'end': 2049},\n",
" {'entity': '0',\n",
" 'score': 0.9107988,\n",
" 'index': 481,\n",
" 'word': '▁mesa',\n",
" 'start': 2050,\n",
" 'end': 2054},\n",
" {'entity': '0',\n",
" 'score': 0.82752305,\n",
" 'index': 482,\n",
" 'word': ',',\n",
" 'start': 2054,\n",
" 'end': 2055},\n",
" {'entity': '0',\n",
" 'score': 0.99993956,\n",
" 'index': 483,\n",
" 'word': '▁which',\n",
" 'start': 2056,\n",
" 'end': 2061},\n",
" {'entity': '0',\n",
" 'score': 0.9999516,\n",
" 'index': 484,\n",
" 'word': '▁are',\n",
" 'start': 2062,\n",
" 'end': 2065},\n",
" {'entity': '0',\n",
" 'score': 0.99993145,\n",
" 'index': 485,\n",
" 'word': '▁land',\n",
" 'start': 2066,\n",
" 'end': 2070},\n",
" {'entity': '0',\n",
" 'score': 0.9999263,\n",
" 'index': 486,\n",
" 'word': 'form',\n",
" 'start': 2070,\n",
" 'end': 2074},\n",
" {'entity': '0',\n",
" 'score': 0.99993145,\n",
" 'index': 487,\n",
" 'word': 's',\n",
" 'start': 2074,\n",
" 'end': 2075},\n",
" {'entity': '0',\n",
" 'score': 0.99996626,\n",
" 'index': 488,\n",
" 'word': '▁common',\n",
" 'start': 2076,\n",
" 'end': 2082},\n",
" {'entity': '0',\n",
" 'score': 0.9999869,\n",
" 'index': 489,\n",
" 'word': '▁around',\n",
" 'start': 2083,\n",
" 'end': 2089},\n",
" {'entity': '0',\n",
" 'score': 0.99999,\n",
" 'index': 490,\n",
" 'word': '▁the',\n",
" 'start': 2090,\n",
" 'end': 2093},\n",
" {'entity': '0',\n",
" 'score': 0.9999894,\n",
" 'index': 491,\n",
" 'word': '▁American',\n",
" 'start': 2094,\n",
" 'end': 2102},\n",
" {'entity': '0',\n",
" 'score': 0.9730223,\n",
" 'index': 492,\n",
" 'word': '▁West',\n",
" 'start': 2103,\n",
" 'end': 2107},\n",
" {'entity': '0',\n",
" 'score': 0.97686726,\n",
" 'index': 493,\n",
" 'word': '.',\n",
" 'start': 2107,\n",
" 'end': 2108}]"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"mukowaty/punctuate-16\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"mukowaty/punctuate-16\")\n",
"nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"\n",
"ner_results = nlp(text)\n",
"ner_results"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "8c60dc0e-210c-4c34-87f4-9d960dc810fa",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
", 5\n",
". 5\n",
"0 483\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word \n",
", ▁No 1\n",
" ▁no 1\n",
" ▁size 1\n",
" ▁wrong 1\n",
" ▁yes 1\n",
". 1\n",
" ? 1\n",
" ze 1\n",
" ▁1998. 1\n",
" ▁ha 1\n",
"0 6\n",
" , 34\n",
" - 2\n",
" . 11\n",
" ; 1\n",
" But 1\n",
" a 1\n",
" an 1\n",
" bit 1\n",
" cer 1\n",
" cient 1\n",
" craft 1\n",
" ct 1\n",
" d 3\n",
" de 1\n",
" do 1\n",
" ed 1\n",
" er 2\n",
" form 3\n",
" ing 1\n",
" ings 1\n",
" ion 1\n",
" less 1\n",
" ling 1\n",
" mble 1\n",
" med 1\n",
" n 1\n",
" ness 1\n",
" nia 1\n",
" oh 1\n",
" out 1\n",
" pping 1\n",
" re 1\n",
" rra 1\n",
" s 13\n",
" t 2\n",
" te 1\n",
" ted 2\n",
" tion 1\n",
" un 1\n",
" y 2\n",
" ▁ 1\n",
" ▁1 1\n",
" ▁2001 1\n",
" ▁25 1\n",
" ▁3 1\n",
" ▁5 1\n",
" ▁8 1\n",
" ▁American 1\n",
" ▁April 2\n",
" ▁Cy 1\n",
" ▁Egypt 1\n",
" ▁Face 1\n",
" ▁Malin 2\n",
" ▁Mars 7\n",
" ▁Marti 1\n",
" ▁Michael 1\n",
" ▁NASA 3\n",
" ▁On 1\n",
" ▁Or 1\n",
" ▁Phar 1\n",
" ▁So 1\n",
" ▁Some 1\n",
" ▁The 1\n",
" ▁Us 1\n",
" ▁Very 1\n",
" ▁Viking 2\n",
" ▁We 2\n",
" ▁Well 1\n",
" ▁West 1\n",
" ▁What 1\n",
" ▁With 1\n",
" ▁a 10\n",
" ▁able 1\n",
" ▁about 1\n",
" ▁absolute 1\n",
" ▁ago 1\n",
" ▁alien 3\n",
" ▁all 3\n",
" ▁amazing 1\n",
" ▁an 3\n",
" ▁and 8\n",
" ▁announced 1\n",
" ▁another 3\n",
" ▁any 1\n",
" ▁appeared 1\n",
" ▁are 1\n",
" ▁around 2\n",
" ▁at 2\n",
" ▁attention 1\n",
" ▁be 2\n",
" ▁became 1\n",
" ▁because 1\n",
" ▁bigger 1\n",
" ▁books 1\n",
" ▁budget 1\n",
" ▁but 4\n",
" ▁by 3\n",
" ▁camera 3\n",
" ▁can 1\n",
" ▁capture 1\n",
" ▁check 1\n",
" ▁circ 1\n",
" ▁civiliza 1\n",
" ▁clear 1\n",
" ▁cloud 1\n",
" ▁common 2\n",
" ▁correct 1\n",
" ▁could 2\n",
" ▁created 1\n",
" ▁day 1\n",
" ▁days 1\n",
" ▁decided 2\n",
" ▁defender 1\n",
" ▁did 1\n",
" ▁digital 1\n",
" ▁discern 1\n",
" ▁easily 1\n",
" ▁engage 1\n",
" ▁evidence 2\n",
" ▁face 4\n",
" ▁few 1\n",
" ▁figure 1\n",
" ▁finding 1\n",
" ▁five 1\n",
" ▁for 3\n",
" ▁formation 1\n",
" ▁good 1\n",
" ▁gro 1\n",
" ▁ha 1\n",
" ▁had 1\n",
" ▁head 1\n",
" ▁hi 1\n",
" ▁hidden 1\n",
" ▁his 1\n",
" ▁huge 1\n",
" ▁human 2\n",
" ▁icon 1\n",
" ▁if 2\n",
" ▁image 2\n",
" ▁in 3\n",
" ▁is 2\n",
" ▁it 10\n",
" ▁just 3\n",
" ▁land 3\n",
" ▁later 1\n",
" ▁life 3\n",
" ▁like 2\n",
" ▁lines 1\n",
" ▁look 1\n",
" ▁made 2\n",
" ▁magazine 1\n",
" ▁make 1\n",
" ▁making 1\n",
" ▁mark 1\n",
" ▁maximum 1\n",
" ▁me 1\n",
" ▁mean 1\n",
" ▁means 1\n",
" ▁meant 1\n",
" ▁mesa 2\n",
" ▁monument 1\n",
" ▁movies 1\n",
" ▁natural 2\n",
" ▁no 1\n",
" ▁note 1\n",
" ▁obviously 1\n",
" ▁of 5\n",
" ▁on 6\n",
" ▁one 1\n",
" ▁only 2\n",
" ▁or 1\n",
" ▁original 1\n",
" ▁our 1\n",
" ▁out 1\n",
" ▁people 1\n",
" ▁photo 1\n",
" ▁photos 2\n",
" ▁picture 4\n",
" ▁pixel 1\n",
" ▁planet 1\n",
" ▁pop 1\n",
" ▁prove 1\n",
" ▁public 1\n",
" ▁radio 1\n",
" ▁really 1\n",
" ▁rese 1\n",
" ▁reveal 2\n",
" ▁revolution 1\n",
" ▁rock 1\n",
" ▁rumor 1\n",
" ▁scientist 3\n",
" ▁see 2\n",
" ▁shadow 3\n",
" ▁sharp 1\n",
" ▁shot 2\n",
" ▁should 1\n",
" ▁showed 1\n",
" ▁shows 1\n",
" ▁sign 1\n",
" ▁sna 1\n",
" ▁soon 1\n",
" ▁space 1\n",
" ▁spot 1\n",
" ▁started 1\n",
" ▁store 1\n",
" ▁story 1\n",
" ▁summer 1\n",
" ▁sure 3\n",
" ▁take 2\n",
" ▁talk 1\n",
" ▁team 2\n",
" ▁tell 1\n",
" ▁ten 1\n",
" ▁than 2\n",
" ▁that 10\n",
" ▁the 16\n",
" ▁them 1\n",
" ▁there 3\n",
" ▁they 1\n",
" ▁things 1\n",
" ▁this 2\n",
" ▁thought 2\n",
" ▁times 2\n",
" ▁to 10\n",
" ▁took 1\n",
" ▁twenty 1\n",
" ▁us 1\n",
" ▁using 1\n",
" ▁very 1\n",
" ▁wanted 1\n",
" ▁was 10\n",
" ▁wasn 1\n",
" ▁way 1\n",
" ▁we 5\n",
" ▁were 4\n",
" ▁what 1\n",
" ▁when 1\n",
" ▁which 5\n",
" ▁whole 1\n",
" ▁wish 1\n",
" ▁with 1\n",
" ▁would 1\n",
" ▁wrong 1\n",
" ▁years 2\n",
" ▁you 4\n",
"dtype: int64"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"\n",
"with open(\"34 mukowatypunctuate-16.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "6e67c25d-82fc-494d-95c6-3812228d0d7d",
"metadata": {},
"source": [
"## 35 HiTZ/mbert-argmining-abstrct-multilingual"
]
},
{
"cell_type": "code",
"execution_count": 70,
"id": "6e8a7262-9e25-4363-bae7-49d477b96175",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"HiTZ/mbert-argmining-abstrct-multilingual\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"HiTZ/mbert-argmining-abstrct-multilingual\")\n",
"nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"\n",
"ner_results = nlp(text)\n",
"ner_results"
]
},
{
"cell_type": "markdown",
"id": "4a12a382-1d1b-440c-b968-d9b307d1b391",
"metadata": {},
"source": [
"## 36 benjamin/wtp-canine-s-12l"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "462a5e9d-56f2-449e-b80c-28e56d8f4000",
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:951\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:653\u001b[0m, in \u001b[0;36m_LazyConfigMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping:\n\u001b[1;32m--> 653\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[0;32m 654\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping[key]\n",
"\u001b[1;31mKeyError\u001b[0m: 'la-canine'",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[72], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoTokenizer, AutoModelForTokenClassification\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-12l\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-12l\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m nlp \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:953\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m--> 953\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 954\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe checkpoint you are trying to load has model type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 955\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut Transformers does not recognize this architecture. This could be because of an \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 956\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124missue with the checkpoint, or because your version of Transformers is out of date.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 957\u001b[0m )\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_dict(config_dict, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39munused_kwargs)\n\u001b[0;32m 959\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 960\u001b[0m \u001b[38;5;66;03m# Fallback: use pattern matching on the string.\u001b[39;00m\n\u001b[0;32m 961\u001b[0m \u001b[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001b[39;00m\n",
"\u001b[1;31mValueError\u001b[0m: The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date."
]
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"benjamin/wtp-canine-s-12l\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"benjamin/wtp-canine-s-12l\")\n",
"nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"\n",
"ner_results = nlp(text)\n",
"ner_results"
]
},
{
"cell_type": "markdown",
"id": "b0794928-e326-4f21-9a11-4d9cfa04a373",
"metadata": {},
"source": [
"## 37 benjamin/wtp-canine-s-3l-no-adapters"
]
},
{
"cell_type": "code",
"execution_count": 74,
"id": "e57f841c-e383-430c-8afd-a954e4bd2483",
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:951\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:653\u001b[0m, in \u001b[0;36m_LazyConfigMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping:\n\u001b[1;32m--> 653\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[0;32m 654\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping[key]\n",
"\u001b[1;31mKeyError\u001b[0m: 'la-canine'",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[74], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoTokenizer, AutoModelForTokenClassification\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-3l-no-adapters\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-3l-no-adapters\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m nlp \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:953\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m--> 953\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 954\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe checkpoint you are trying to load has model type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 955\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut Transformers does not recognize this architecture. This could be because of an \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 956\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124missue with the checkpoint, or because your version of Transformers is out of date.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 957\u001b[0m )\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_dict(config_dict, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39munused_kwargs)\n\u001b[0;32m 959\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 960\u001b[0m \u001b[38;5;66;03m# Fallback: use pattern matching on the string.\u001b[39;00m\n\u001b[0;32m 961\u001b[0m \u001b[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001b[39;00m\n",
"\u001b[1;31mValueError\u001b[0m: The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date."
]
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"benjamin/wtp-canine-s-3l-no-adapters\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"benjamin/wtp-canine-s-3l-no-adapters\")\n",
"nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"\n",
"ner_results = nlp(text)\n",
"ner_results"
]
},
{
"cell_type": "markdown",
"id": "72e8fe70-ec5a-43dc-b4a2-aead0b235f75",
"metadata": {},
"source": [
"## 38 benjamin/wtp-canine-s-9l-no-adapters"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "c63de375-015d-4f70-a9f4-bdfa18a6a3cf",
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:951\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:653\u001b[0m, in \u001b[0;36m_LazyConfigMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping:\n\u001b[1;32m--> 653\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[0;32m 654\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping[key]\n",
"\u001b[1;31mKeyError\u001b[0m: 'la-canine'",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[77], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoTokenizer, AutoModelForTokenClassification\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-9l-no-adapters\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-9l-no-adapters\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m nlp \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:953\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m--> 953\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 954\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe checkpoint you are trying to load has model type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 955\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut Transformers does not recognize this architecture. This could be because of an \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 956\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124missue with the checkpoint, or because your version of Transformers is out of date.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 957\u001b[0m )\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_dict(config_dict, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39munused_kwargs)\n\u001b[0;32m 959\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 960\u001b[0m \u001b[38;5;66;03m# Fallback: use pattern matching on the string.\u001b[39;00m\n\u001b[0;32m 961\u001b[0m \u001b[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001b[39;00m\n",
"\u001b[1;31mValueError\u001b[0m: The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date."
]
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"benjamin/wtp-canine-s-9l-no-adapters\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"benjamin/wtp-canine-s-9l-no-adapters\")\n",
"nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"\n",
"ner_results = nlp(text)\n",
"ner_results"
]
},
{
"cell_type": "markdown",
"id": "16b75f15-739f-45a4-8211-b094fd2b9104",
"metadata": {},
"source": [
"## 39 msislam/code-mixed-language-detection-XLMRoberta"
]
},
{
"cell_type": "code",
"execution_count": 79,
"id": "e72aafcc-9c45-47c4-bd61-bf0941e523c0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'entity': 'I-EN',\n",
" 'score': 0.9999919,\n",
" 'index': 1,\n",
" 'word': '▁So',\n",
" 'start': 0,\n",
" 'end': 2},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999356,\n",
" 'index': 2,\n",
" 'word': ',',\n",
" 'start': 2,\n",
" 'end': 3},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999995,\n",
" 'index': 3,\n",
" 'word': '▁if',\n",
" 'start': 4,\n",
" 'end': 6},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 4,\n",
" 'word': '▁you',\n",
" 'start': 7,\n",
" 'end': 10},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999944,\n",
" 'index': 5,\n",
" 'word': \"'\",\n",
" 'start': 10,\n",
" 'end': 11},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999535,\n",
" 'index': 6,\n",
" 'word': 're',\n",
" 'start': 11,\n",
" 'end': 13},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 7,\n",
" 'word': '▁a',\n",
" 'start': 14,\n",
" 'end': 15},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999945,\n",
" 'index': 8,\n",
" 'word': '▁NASA',\n",
" 'start': 16,\n",
" 'end': 20},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999995,\n",
" 'index': 9,\n",
" 'word': '▁scientist',\n",
" 'start': 21,\n",
" 'end': 30},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 10,\n",
" 'word': ',',\n",
" 'start': 30,\n",
" 'end': 31},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 11,\n",
" 'word': '▁you',\n",
" 'start': 32,\n",
" 'end': 35},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 12,\n",
" 'word': '▁should',\n",
" 'start': 36,\n",
" 'end': 42},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 13,\n",
" 'word': '▁be',\n",
" 'start': 43,\n",
" 'end': 45},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 14,\n",
" 'word': '▁able',\n",
" 'start': 46,\n",
" 'end': 50},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999535,\n",
" 'index': 15,\n",
" 'word': '▁to',\n",
" 'start': 51,\n",
" 'end': 53},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 16,\n",
" 'word': '▁tell',\n",
" 'start': 54,\n",
" 'end': 58},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 17,\n",
" 'word': '▁me',\n",
" 'start': 59,\n",
" 'end': 61},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 18,\n",
" 'word': '▁the',\n",
" 'start': 62,\n",
" 'end': 65},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 19,\n",
" 'word': '▁whole',\n",
" 'start': 66,\n",
" 'end': 71},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 20,\n",
" 'word': '▁story',\n",
" 'start': 72,\n",
" 'end': 77},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999535,\n",
" 'index': 21,\n",
" 'word': '▁about',\n",
" 'start': 78,\n",
" 'end': 83},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 22,\n",
" 'word': '▁the',\n",
" 'start': 84,\n",
" 'end': 87},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 23,\n",
" 'word': '▁Face',\n",
" 'start': 88,\n",
" 'end': 92},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 24,\n",
" 'word': '▁On',\n",
" 'start': 93,\n",
" 'end': 95},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 25,\n",
" 'word': '▁Mars',\n",
" 'start': 96,\n",
" 'end': 100},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 26,\n",
" 'word': ',',\n",
" 'start': 100,\n",
" 'end': 101},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 27,\n",
" 'word': '▁which',\n",
" 'start': 102,\n",
" 'end': 107},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 28,\n",
" 'word': '▁obviously',\n",
" 'start': 108,\n",
" 'end': 117},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999962,\n",
" 'index': 29,\n",
" 'word': '▁is',\n",
" 'start': 118,\n",
" 'end': 120},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 30,\n",
" 'word': '▁evidence',\n",
" 'start': 121,\n",
" 'end': 129},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 31,\n",
" 'word': '▁that',\n",
" 'start': 130,\n",
" 'end': 134},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 32,\n",
" 'word': '▁there',\n",
" 'start': 135,\n",
" 'end': 140},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999607,\n",
" 'index': 33,\n",
" 'word': '▁is',\n",
" 'start': 141,\n",
" 'end': 143},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 34,\n",
" 'word': '▁life',\n",
" 'start': 144,\n",
" 'end': 148},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 35,\n",
" 'word': '▁on',\n",
" 'start': 149,\n",
" 'end': 151},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 36,\n",
" 'word': '▁Mars',\n",
" 'start': 152,\n",
" 'end': 156},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 37,\n",
" 'word': ',',\n",
" 'start': 156,\n",
" 'end': 157},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999595,\n",
" 'index': 38,\n",
" 'word': '▁and',\n",
" 'start': 158,\n",
" 'end': 161},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 39,\n",
" 'word': '▁that',\n",
" 'start': 162,\n",
" 'end': 166},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999595,\n",
" 'index': 40,\n",
" 'word': '▁the',\n",
" 'start': 167,\n",
" 'end': 170},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999595,\n",
" 'index': 41,\n",
" 'word': '▁face',\n",
" 'start': 171,\n",
" 'end': 175},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999595,\n",
" 'index': 42,\n",
" 'word': '▁was',\n",
" 'start': 176,\n",
" 'end': 179},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 43,\n",
" 'word': '▁created',\n",
" 'start': 180,\n",
" 'end': 187},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 44,\n",
" 'word': '▁by',\n",
" 'start': 188,\n",
" 'end': 190},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999995,\n",
" 'index': 45,\n",
" 'word': '▁alien',\n",
" 'start': 191,\n",
" 'end': 196},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999595,\n",
" 'index': 46,\n",
" 'word': 's',\n",
" 'start': 196,\n",
" 'end': 197},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999962,\n",
" 'index': 47,\n",
" 'word': ',',\n",
" 'start': 197,\n",
" 'end': 198},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999356,\n",
" 'index': 48,\n",
" 'word': '▁correct',\n",
" 'start': 199,\n",
" 'end': 206},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999927,\n",
" 'index': 49,\n",
" 'word': '?\"',\n",
" 'start': 206,\n",
" 'end': 208},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999285,\n",
" 'index': 50,\n",
" 'word': '▁No',\n",
" 'start': 209,\n",
" 'end': 211},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 51,\n",
" 'word': ',',\n",
" 'start': 211,\n",
" 'end': 212},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999933,\n",
" 'index': 52,\n",
" 'word': '▁twenty',\n",
" 'start': 213,\n",
" 'end': 219},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999404,\n",
" 'index': 53,\n",
" 'word': '▁five',\n",
" 'start': 220,\n",
" 'end': 224},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999944,\n",
" 'index': 54,\n",
" 'word': '▁years',\n",
" 'start': 225,\n",
" 'end': 230},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 55,\n",
" 'word': '▁ago',\n",
" 'start': 231,\n",
" 'end': 234},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 56,\n",
" 'word': ',',\n",
" 'start': 234,\n",
" 'end': 235},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 57,\n",
" 'word': '▁our',\n",
" 'start': 236,\n",
" 'end': 239},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 58,\n",
" 'word': '▁Viking',\n",
" 'start': 240,\n",
" 'end': 246},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 59,\n",
" 'word': '▁1',\n",
" 'start': 247,\n",
" 'end': 248},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 60,\n",
" 'word': '▁space',\n",
" 'start': 249,\n",
" 'end': 254},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999535,\n",
" 'index': 61,\n",
" 'word': 'craft',\n",
" 'start': 254,\n",
" 'end': 259},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999595,\n",
" 'index': 62,\n",
" 'word': '▁was',\n",
" 'start': 260,\n",
" 'end': 263},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 63,\n",
" 'word': '▁circ',\n",
" 'start': 264,\n",
" 'end': 268},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 64,\n",
" 'word': 'ling',\n",
" 'start': 268,\n",
" 'end': 272},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 65,\n",
" 'word': '▁the',\n",
" 'start': 273,\n",
" 'end': 276},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999535,\n",
" 'index': 66,\n",
" 'word': '▁planet',\n",
" 'start': 277,\n",
" 'end': 283},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 67,\n",
" 'word': ',',\n",
" 'start': 283,\n",
" 'end': 284},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 68,\n",
" 'word': '▁sna',\n",
" 'start': 285,\n",
" 'end': 288},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 69,\n",
" 'word': 'pping',\n",
" 'start': 288,\n",
" 'end': 293},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 70,\n",
" 'word': '▁photos',\n",
" 'start': 294,\n",
" 'end': 300},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 71,\n",
" 'word': ',',\n",
" 'start': 300,\n",
" 'end': 301},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 72,\n",
" 'word': '▁when',\n",
" 'start': 302,\n",
" 'end': 306},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 73,\n",
" 'word': '▁it',\n",
" 'start': 307,\n",
" 'end': 309},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 74,\n",
" 'word': '▁spot',\n",
" 'start': 310,\n",
" 'end': 314},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999945,\n",
" 'index': 75,\n",
" 'word': 'ted',\n",
" 'start': 314,\n",
" 'end': 317},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999535,\n",
" 'index': 76,\n",
" 'word': '▁the',\n",
" 'start': 318,\n",
" 'end': 321},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 77,\n",
" 'word': '▁shadow',\n",
" 'start': 322,\n",
" 'end': 328},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 78,\n",
" 'word': 'y',\n",
" 'start': 328,\n",
" 'end': 329},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 79,\n",
" 'word': '▁like',\n",
" 'start': 330,\n",
" 'end': 334},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 80,\n",
" 'word': 'ness',\n",
" 'start': 334,\n",
" 'end': 338},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 81,\n",
" 'word': '▁of',\n",
" 'start': 339,\n",
" 'end': 341},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 82,\n",
" 'word': '▁a',\n",
" 'start': 342,\n",
" 'end': 343},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 83,\n",
" 'word': '▁human',\n",
" 'start': 344,\n",
" 'end': 349},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999595,\n",
" 'index': 84,\n",
" 'word': '▁face',\n",
" 'start': 350,\n",
" 'end': 354},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999595,\n",
" 'index': 85,\n",
" 'word': '.',\n",
" 'start': 354,\n",
" 'end': 355},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999607,\n",
" 'index': 86,\n",
" 'word': '▁Us',\n",
" 'start': 356,\n",
" 'end': 358},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 87,\n",
" 'word': '▁scientist',\n",
" 'start': 359,\n",
" 'end': 368},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999962,\n",
" 'index': 88,\n",
" 'word': 's',\n",
" 'start': 368,\n",
" 'end': 369},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 89,\n",
" 'word': '▁figure',\n",
" 'start': 370,\n",
" 'end': 376},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 90,\n",
" 'word': 'd',\n",
" 'start': 376,\n",
" 'end': 377},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999939,\n",
" 'index': 91,\n",
" 'word': '▁out',\n",
" 'start': 378,\n",
" 'end': 381},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 92,\n",
" 'word': '▁that',\n",
" 'start': 382,\n",
" 'end': 386},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 93,\n",
" 'word': '▁it',\n",
" 'start': 387,\n",
" 'end': 389},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 94,\n",
" 'word': '▁was',\n",
" 'start': 390,\n",
" 'end': 393},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999939,\n",
" 'index': 95,\n",
" 'word': '▁just',\n",
" 'start': 394,\n",
" 'end': 398},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 96,\n",
" 'word': '▁another',\n",
" 'start': 399,\n",
" 'end': 406},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 97,\n",
" 'word': '▁Marti',\n",
" 'start': 407,\n",
" 'end': 412},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 98,\n",
" 'word': 'an',\n",
" 'start': 412,\n",
" 'end': 414},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999995,\n",
" 'index': 99,\n",
" 'word': '▁mesa',\n",
" 'start': 415,\n",
" 'end': 419},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 100,\n",
" 'word': ',',\n",
" 'start': 419,\n",
" 'end': 420},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999939,\n",
" 'index': 101,\n",
" 'word': '▁common',\n",
" 'start': 421,\n",
" 'end': 427},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999404,\n",
" 'index': 102,\n",
" 'word': '▁around',\n",
" 'start': 428,\n",
" 'end': 434},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 103,\n",
" 'word': '▁Cy',\n",
" 'start': 435,\n",
" 'end': 437},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 104,\n",
" 'word': 'do',\n",
" 'start': 437,\n",
" 'end': 439},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 105,\n",
" 'word': 'nia',\n",
" 'start': 439,\n",
" 'end': 442},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 106,\n",
" 'word': ',',\n",
" 'start': 442,\n",
" 'end': 443},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999943,\n",
" 'index': 107,\n",
" 'word': '▁only',\n",
" 'start': 444,\n",
" 'end': 448},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 108,\n",
" 'word': '▁this',\n",
" 'start': 449,\n",
" 'end': 453},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 109,\n",
" 'word': '▁one',\n",
" 'start': 454,\n",
" 'end': 457},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999595,\n",
" 'index': 110,\n",
" 'word': '▁had',\n",
" 'start': 458,\n",
" 'end': 461},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 111,\n",
" 'word': '▁shadow',\n",
" 'start': 462,\n",
" 'end': 468},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 112,\n",
" 'word': 's',\n",
" 'start': 468,\n",
" 'end': 469},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 113,\n",
" 'word': '▁that',\n",
" 'start': 470,\n",
" 'end': 474},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 114,\n",
" 'word': '▁made',\n",
" 'start': 475,\n",
" 'end': 479},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999535,\n",
" 'index': 115,\n",
" 'word': '▁it',\n",
" 'start': 480,\n",
" 'end': 482},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 116,\n",
" 'word': '▁look',\n",
" 'start': 483,\n",
" 'end': 487},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999595,\n",
" 'index': 117,\n",
" 'word': '▁like',\n",
" 'start': 488,\n",
" 'end': 492},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999962,\n",
" 'index': 118,\n",
" 'word': '▁an',\n",
" 'start': 493,\n",
" 'end': 495},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999595,\n",
" 'index': 119,\n",
" 'word': '▁Egypt',\n",
" 'start': 496,\n",
" 'end': 501},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999595,\n",
" 'index': 120,\n",
" 'word': 'ion',\n",
" 'start': 501,\n",
" 'end': 504},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999595,\n",
" 'index': 121,\n",
" 'word': '▁Phar',\n",
" 'start': 505,\n",
" 'end': 509},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 122,\n",
" 'word': 'a',\n",
" 'start': 509,\n",
" 'end': 510},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 123,\n",
" 'word': 'oh',\n",
" 'start': 510,\n",
" 'end': 512},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999963,\n",
" 'index': 124,\n",
" 'word': '.',\n",
" 'start': 512,\n",
" 'end': 513},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 125,\n",
" 'word': '▁Very',\n",
" 'start': 514,\n",
" 'end': 518},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 126,\n",
" 'word': '▁few',\n",
" 'start': 519,\n",
" 'end': 522},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 127,\n",
" 'word': '▁days',\n",
" 'start': 523,\n",
" 'end': 527},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999535,\n",
" 'index': 128,\n",
" 'word': '▁later',\n",
" 'start': 528,\n",
" 'end': 533},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 129,\n",
" 'word': ',',\n",
" 'start': 533,\n",
" 'end': 534},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999607,\n",
" 'index': 130,\n",
" 'word': '▁we',\n",
" 'start': 535,\n",
" 'end': 537},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 131,\n",
" 'word': '▁reveal',\n",
" 'start': 538,\n",
" 'end': 544},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999535,\n",
" 'index': 132,\n",
" 'word': 'ed',\n",
" 'start': 544,\n",
" 'end': 546},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 133,\n",
" 'word': '▁the',\n",
" 'start': 547,\n",
" 'end': 550},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 134,\n",
" 'word': '▁image',\n",
" 'start': 551,\n",
" 'end': 556},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999595,\n",
" 'index': 135,\n",
" 'word': '▁for',\n",
" 'start': 557,\n",
" 'end': 560},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999535,\n",
" 'index': 136,\n",
" 'word': '▁all',\n",
" 'start': 561,\n",
" 'end': 564},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 137,\n",
" 'word': '▁to',\n",
" 'start': 565,\n",
" 'end': 567},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 138,\n",
" 'word': '▁see',\n",
" 'start': 568,\n",
" 'end': 571},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999416,\n",
" 'index': 139,\n",
" 'word': ',',\n",
" 'start': 571,\n",
" 'end': 572},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 140,\n",
" 'word': '▁and',\n",
" 'start': 573,\n",
" 'end': 576},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 141,\n",
" 'word': '▁we',\n",
" 'start': 577,\n",
" 'end': 579},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 142,\n",
" 'word': '▁made',\n",
" 'start': 580,\n",
" 'end': 584},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 143,\n",
" 'word': '▁sure',\n",
" 'start': 585,\n",
" 'end': 589},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999943,\n",
" 'index': 144,\n",
" 'word': '▁to',\n",
" 'start': 590,\n",
" 'end': 592},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 145,\n",
" 'word': '▁note',\n",
" 'start': 593,\n",
" 'end': 597},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 146,\n",
" 'word': '▁that',\n",
" 'start': 598,\n",
" 'end': 602},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 147,\n",
" 'word': '▁it',\n",
" 'start': 603,\n",
" 'end': 605},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 148,\n",
" 'word': '▁was',\n",
" 'start': 606,\n",
" 'end': 609},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999475,\n",
" 'index': 149,\n",
" 'word': '▁a',\n",
" 'start': 610,\n",
" 'end': 611},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999595,\n",
" 'index': 150,\n",
" 'word': '▁huge',\n",
" 'start': 612,\n",
" 'end': 616},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 151,\n",
" 'word': '▁rock',\n",
" 'start': 617,\n",
" 'end': 621},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 152,\n",
" 'word': '▁formation',\n",
" 'start': 622,\n",
" 'end': 631},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 153,\n",
" 'word': '▁that',\n",
" 'start': 632,\n",
" 'end': 636},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 154,\n",
" 'word': '▁just',\n",
" 'start': 637,\n",
" 'end': 641},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 155,\n",
" 'word': '▁rese',\n",
" 'start': 642,\n",
" 'end': 646},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999945,\n",
" 'index': 156,\n",
" 'word': 'mble',\n",
" 'start': 646,\n",
" 'end': 650},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 157,\n",
" 'word': 'd',\n",
" 'start': 650,\n",
" 'end': 651},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 158,\n",
" 'word': '▁a',\n",
" 'start': 652,\n",
" 'end': 653},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999945,\n",
" 'index': 159,\n",
" 'word': '▁human',\n",
" 'start': 654,\n",
" 'end': 659},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 160,\n",
" 'word': '▁head',\n",
" 'start': 660,\n",
" 'end': 664},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999595,\n",
" 'index': 161,\n",
" 'word': '▁and',\n",
" 'start': 665,\n",
" 'end': 668},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999475,\n",
" 'index': 162,\n",
" 'word': '▁face',\n",
" 'start': 669,\n",
" 'end': 673},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999932,\n",
" 'index': 163,\n",
" 'word': ',',\n",
" 'start': 673,\n",
" 'end': 674},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 164,\n",
" 'word': '▁but',\n",
" 'start': 675,\n",
" 'end': 678},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999535,\n",
" 'index': 165,\n",
" 'word': '▁all',\n",
" 'start': 679,\n",
" 'end': 682},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999475,\n",
" 'index': 166,\n",
" 'word': '▁of',\n",
" 'start': 683,\n",
" 'end': 685},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999944,\n",
" 'index': 167,\n",
" 'word': '▁it',\n",
" 'start': 686,\n",
" 'end': 688},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 168,\n",
" 'word': '▁was',\n",
" 'start': 689,\n",
" 'end': 692},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999535,\n",
" 'index': 169,\n",
" 'word': '▁for',\n",
" 'start': 693,\n",
" 'end': 696},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999995,\n",
" 'index': 170,\n",
" 'word': 'med',\n",
" 'start': 696,\n",
" 'end': 699},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 171,\n",
" 'word': '▁by',\n",
" 'start': 700,\n",
" 'end': 702},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999945,\n",
" 'index': 172,\n",
" 'word': '▁shadow',\n",
" 'start': 703,\n",
" 'end': 709},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 173,\n",
" 'word': 's',\n",
" 'start': 709,\n",
" 'end': 710},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 174,\n",
" 'word': '.',\n",
" 'start': 710,\n",
" 'end': 711},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 175,\n",
" 'word': '▁We',\n",
" 'start': 712,\n",
" 'end': 714},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 176,\n",
" 'word': '▁only',\n",
" 'start': 715,\n",
" 'end': 719},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999963,\n",
" 'index': 177,\n",
" 'word': '▁announced',\n",
" 'start': 720,\n",
" 'end': 729},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 178,\n",
" 'word': '▁it',\n",
" 'start': 730,\n",
" 'end': 732},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999595,\n",
" 'index': 179,\n",
" 'word': '▁because',\n",
" 'start': 733,\n",
" 'end': 740},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 180,\n",
" 'word': '▁we',\n",
" 'start': 741,\n",
" 'end': 743},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 181,\n",
" 'word': '▁thought',\n",
" 'start': 744,\n",
" 'end': 751},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 182,\n",
" 'word': '▁it',\n",
" 'start': 752,\n",
" 'end': 754},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 183,\n",
" 'word': '▁would',\n",
" 'start': 755,\n",
" 'end': 760},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 184,\n",
" 'word': '▁be',\n",
" 'start': 761,\n",
" 'end': 763},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999943,\n",
" 'index': 185,\n",
" 'word': '▁a',\n",
" 'start': 764,\n",
" 'end': 765},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 186,\n",
" 'word': '▁good',\n",
" 'start': 766,\n",
" 'end': 770},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999995,\n",
" 'index': 187,\n",
" 'word': '▁way',\n",
" 'start': 771,\n",
" 'end': 774},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999945,\n",
" 'index': 188,\n",
" 'word': '▁to',\n",
" 'start': 775,\n",
" 'end': 777},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 189,\n",
" 'word': '▁engage',\n",
" 'start': 778,\n",
" 'end': 784},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999945,\n",
" 'index': 190,\n",
" 'word': '▁the',\n",
" 'start': 785,\n",
" 'end': 788},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999535,\n",
" 'index': 191,\n",
" 'word': '▁public',\n",
" 'start': 789,\n",
" 'end': 795},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 192,\n",
" 'word': '▁with',\n",
" 'start': 796,\n",
" 'end': 800},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 193,\n",
" 'word': '▁NASA',\n",
" 'start': 801,\n",
" 'end': 805},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 194,\n",
" 'word': \"'\",\n",
" 'start': 805,\n",
" 'end': 806},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 195,\n",
" 'word': 's',\n",
" 'start': 806,\n",
" 'end': 807},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 196,\n",
" 'word': '▁finding',\n",
" 'start': 808,\n",
" 'end': 815},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999939,\n",
" 'index': 197,\n",
" 'word': 's',\n",
" 'start': 815,\n",
" 'end': 816},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999945,\n",
" 'index': 198,\n",
" 'word': ',',\n",
" 'start': 816,\n",
" 'end': 817},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999962,\n",
" 'index': 199,\n",
" 'word': '▁and',\n",
" 'start': 818,\n",
" 'end': 821},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999595,\n",
" 'index': 200,\n",
" 'word': '▁at',\n",
" 'start': 822,\n",
" 'end': 824},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999995,\n",
" 'index': 201,\n",
" 'word': 'rra',\n",
" 'start': 824,\n",
" 'end': 827},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 202,\n",
" 'word': 'ct',\n",
" 'start': 827,\n",
" 'end': 829},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 203,\n",
" 'word': '▁attention',\n",
" 'start': 830,\n",
" 'end': 839},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999595,\n",
" 'index': 204,\n",
" 'word': '▁to',\n",
" 'start': 840,\n",
" 'end': 842},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999607,\n",
" 'index': 205,\n",
" 'word': '▁Mars',\n",
" 'start': 843,\n",
" 'end': 847},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999964,\n",
" 'index': 206,\n",
" 'word': '-',\n",
" 'start': 847,\n",
" 'end': 848},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999963,\n",
" 'index': 207,\n",
" 'word': '-',\n",
" 'start': 848,\n",
" 'end': 849},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999963,\n",
" 'index': 208,\n",
" 'word': '▁and',\n",
" 'start': 850,\n",
" 'end': 853},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 209,\n",
" 'word': '▁it',\n",
" 'start': 854,\n",
" 'end': 856},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 210,\n",
" 'word': '▁did',\n",
" 'start': 857,\n",
" 'end': 860},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 211,\n",
" 'word': '.',\n",
" 'start': 860,\n",
" 'end': 861},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999913,\n",
" 'index': 212,\n",
" 'word': '▁The',\n",
" 'start': 863,\n",
" 'end': 866},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 213,\n",
" 'word': '▁face',\n",
" 'start': 867,\n",
" 'end': 871},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999995,\n",
" 'index': 214,\n",
" 'word': '▁on',\n",
" 'start': 872,\n",
" 'end': 874},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 215,\n",
" 'word': '▁Mars',\n",
" 'start': 875,\n",
" 'end': 879},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 216,\n",
" 'word': '▁soon',\n",
" 'start': 880,\n",
" 'end': 884},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 217,\n",
" 'word': '▁became',\n",
" 'start': 885,\n",
" 'end': 891},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999938,\n",
" 'index': 218,\n",
" 'word': '▁a',\n",
" 'start': 892,\n",
" 'end': 893},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 219,\n",
" 'word': '▁pop',\n",
" 'start': 894,\n",
" 'end': 897},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999475,\n",
" 'index': 220,\n",
" 'word': '▁icon',\n",
" 'start': 898,\n",
" 'end': 902},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 221,\n",
" 'word': ';',\n",
" 'start': 902,\n",
" 'end': 903},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 222,\n",
" 'word': '▁shot',\n",
" 'start': 904,\n",
" 'end': 908},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 223,\n",
" 'word': '▁in',\n",
" 'start': 909,\n",
" 'end': 911},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 224,\n",
" 'word': '▁movies',\n",
" 'start': 912,\n",
" 'end': 918},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999927,\n",
" 'index': 225,\n",
" 'word': ',',\n",
" 'start': 918,\n",
" 'end': 919},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999944,\n",
" 'index': 226,\n",
" 'word': '▁appeared',\n",
" 'start': 920,\n",
" 'end': 928},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999356,\n",
" 'index': 227,\n",
" 'word': '▁in',\n",
" 'start': 929,\n",
" 'end': 931},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999939,\n",
" 'index': 228,\n",
" 'word': '▁books',\n",
" 'start': 932,\n",
" 'end': 937},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999908,\n",
" 'index': 229,\n",
" 'word': ',',\n",
" 'start': 937,\n",
" 'end': 938},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999344,\n",
" 'index': 230,\n",
" 'word': '▁magazine',\n",
" 'start': 939,\n",
" 'end': 947},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999933,\n",
" 'index': 231,\n",
" 'word': 's',\n",
" 'start': 947,\n",
" 'end': 948},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999908,\n",
" 'index': 232,\n",
" 'word': ',',\n",
" 'start': 948,\n",
" 'end': 949},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999943,\n",
" 'index': 233,\n",
" 'word': '▁radio',\n",
" 'start': 950,\n",
" 'end': 955},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999931,\n",
" 'index': 234,\n",
" 'word': '▁talk',\n",
" 'start': 956,\n",
" 'end': 960},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999943,\n",
" 'index': 235,\n",
" 'word': '▁shows',\n",
" 'start': 961,\n",
" 'end': 966},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999987,\n",
" 'index': 236,\n",
" 'word': ',',\n",
" 'start': 966,\n",
" 'end': 967},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999937,\n",
" 'index': 237,\n",
" 'word': '▁and',\n",
" 'start': 968,\n",
" 'end': 971},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 238,\n",
" 'word': '▁ha',\n",
" 'start': 972,\n",
" 'end': 974},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999939,\n",
" 'index': 239,\n",
" 'word': 'un',\n",
" 'start': 974,\n",
" 'end': 976},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999945,\n",
" 'index': 240,\n",
" 'word': 'ted',\n",
" 'start': 976,\n",
" 'end': 979},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 241,\n",
" 'word': '▁gro',\n",
" 'start': 980,\n",
" 'end': 983},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999938,\n",
" 'index': 242,\n",
" 'word': 'cer',\n",
" 'start': 983,\n",
" 'end': 986},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999285,\n",
" 'index': 243,\n",
" 'word': 'y',\n",
" 'start': 986,\n",
" 'end': 987},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999945,\n",
" 'index': 244,\n",
" 'word': '▁store',\n",
" 'start': 988,\n",
" 'end': 993},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999943,\n",
" 'index': 245,\n",
" 'word': '▁check',\n",
" 'start': 994,\n",
" 'end': 999},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999932,\n",
" 'index': 246,\n",
" 'word': 'out',\n",
" 'start': 999,\n",
" 'end': 1002},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999938,\n",
" 'index': 247,\n",
" 'word': '▁lines',\n",
" 'start': 1003,\n",
" 'end': 1008},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 248,\n",
" 'word': '▁for',\n",
" 'start': 1009,\n",
" 'end': 1012},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 249,\n",
" 'word': '▁25',\n",
" 'start': 1013,\n",
" 'end': 1015},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 250,\n",
" 'word': '▁years',\n",
" 'start': 1016,\n",
" 'end': 1021},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 251,\n",
" 'word': '.',\n",
" 'start': 1021,\n",
" 'end': 1022},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 252,\n",
" 'word': '▁Some',\n",
" 'start': 1023,\n",
" 'end': 1027},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 253,\n",
" 'word': '▁people',\n",
" 'start': 1028,\n",
" 'end': 1034},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999944,\n",
" 'index': 254,\n",
" 'word': '▁thought',\n",
" 'start': 1035,\n",
" 'end': 1042},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999344,\n",
" 'index': 255,\n",
" 'word': '▁the',\n",
" 'start': 1043,\n",
" 'end': 1046},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999937,\n",
" 'index': 256,\n",
" 'word': '▁natural',\n",
" 'start': 1047,\n",
" 'end': 1054},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 257,\n",
" 'word': '▁land',\n",
" 'start': 1055,\n",
" 'end': 1059},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999944,\n",
" 'index': 258,\n",
" 'word': 'form',\n",
" 'start': 1059,\n",
" 'end': 1063},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 259,\n",
" 'word': '▁was',\n",
" 'start': 1064,\n",
" 'end': 1067},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999943,\n",
" 'index': 260,\n",
" 'word': '▁evidence',\n",
" 'start': 1068,\n",
" 'end': 1076},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999937,\n",
" 'index': 261,\n",
" 'word': '▁of',\n",
" 'start': 1077,\n",
" 'end': 1079},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 262,\n",
" 'word': '▁life',\n",
" 'start': 1080,\n",
" 'end': 1084},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999944,\n",
" 'index': 263,\n",
" 'word': '▁on',\n",
" 'start': 1085,\n",
" 'end': 1087},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 264,\n",
" 'word': '▁Mars',\n",
" 'start': 1088,\n",
" 'end': 1092},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999933,\n",
" 'index': 265,\n",
" 'word': ',',\n",
" 'start': 1092,\n",
" 'end': 1093},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999945,\n",
" 'index': 266,\n",
" 'word': '▁and',\n",
" 'start': 1094,\n",
" 'end': 1097},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999404,\n",
" 'index': 267,\n",
" 'word': '▁that',\n",
" 'start': 1098,\n",
" 'end': 1102},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 268,\n",
" 'word': '▁us',\n",
" 'start': 1103,\n",
" 'end': 1105},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999945,\n",
" 'index': 269,\n",
" 'word': '▁scientist',\n",
" 'start': 1106,\n",
" 'end': 1115},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999945,\n",
" 'index': 270,\n",
" 'word': 's',\n",
" 'start': 1115,\n",
" 'end': 1116},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 271,\n",
" 'word': '▁wanted',\n",
" 'start': 1117,\n",
" 'end': 1123},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999416,\n",
" 'index': 272,\n",
" 'word': '▁to',\n",
" 'start': 1124,\n",
" 'end': 1126},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999944,\n",
" 'index': 273,\n",
" 'word': '▁hi',\n",
" 'start': 1127,\n",
" 'end': 1129},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999945,\n",
" 'index': 274,\n",
" 'word': 'de',\n",
" 'start': 1129,\n",
" 'end': 1131},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999945,\n",
" 'index': 275,\n",
" 'word': '▁it',\n",
" 'start': 1132,\n",
" 'end': 1134},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999356,\n",
" 'index': 276,\n",
" 'word': ',',\n",
" 'start': 1134,\n",
" 'end': 1135},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 277,\n",
" 'word': '▁but',\n",
" 'start': 1136,\n",
" 'end': 1139},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 278,\n",
" 'word': '▁really',\n",
" 'start': 1140,\n",
" 'end': 1146},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999931,\n",
" 'index': 279,\n",
" 'word': ',',\n",
" 'start': 1146,\n",
" 'end': 1147},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999932,\n",
" 'index': 280,\n",
" 'word': '▁the',\n",
" 'start': 1148,\n",
" 'end': 1151},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999937,\n",
" 'index': 281,\n",
" 'word': '▁defender',\n",
" 'start': 1152,\n",
" 'end': 1160},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999297,\n",
" 'index': 282,\n",
" 'word': 's',\n",
" 'start': 1160,\n",
" 'end': 1161},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999285,\n",
" 'index': 283,\n",
" 'word': '▁of',\n",
" 'start': 1162,\n",
" 'end': 1164},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999912,\n",
" 'index': 284,\n",
" 'word': '▁the',\n",
" 'start': 1165,\n",
" 'end': 1168},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999945,\n",
" 'index': 285,\n",
" 'word': '▁NASA',\n",
" 'start': 1169,\n",
" 'end': 1173},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999416,\n",
" 'index': 286,\n",
" 'word': '▁budget',\n",
" 'start': 1174,\n",
" 'end': 1180},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999416,\n",
" 'index': 287,\n",
" 'word': '▁wish',\n",
" 'start': 1181,\n",
" 'end': 1185},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 288,\n",
" 'word': '▁there',\n",
" 'start': 1186,\n",
" 'end': 1191},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999907,\n",
" 'index': 289,\n",
" 'word': '▁was',\n",
" 'start': 1192,\n",
" 'end': 1195},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999213,\n",
" 'index': 290,\n",
" 'word': '▁an',\n",
" 'start': 1196,\n",
" 'end': 1198},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999931,\n",
" 'index': 291,\n",
" 'word': 'cient',\n",
" 'start': 1198,\n",
" 'end': 1203},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 292,\n",
" 'word': '▁civiliza',\n",
" 'start': 1204,\n",
" 'end': 1212},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999924,\n",
" 'index': 293,\n",
" 'word': 'tion',\n",
" 'start': 1212,\n",
" 'end': 1216},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999475,\n",
" 'index': 294,\n",
" 'word': '▁on',\n",
" 'start': 1217,\n",
" 'end': 1219},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 295,\n",
" 'word': '▁Mars',\n",
" 'start': 1220,\n",
" 'end': 1224},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 296,\n",
" 'word': '.',\n",
" 'start': 1224,\n",
" 'end': 1225},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999404,\n",
" 'index': 297,\n",
" 'word': '▁We',\n",
" 'start': 1226,\n",
" 'end': 1228},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 298,\n",
" 'word': '▁decided',\n",
" 'start': 1229,\n",
" 'end': 1236},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999924,\n",
" 'index': 299,\n",
" 'word': '▁to',\n",
" 'start': 1237,\n",
" 'end': 1239},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999995,\n",
" 'index': 300,\n",
" 'word': '▁take',\n",
" 'start': 1240,\n",
" 'end': 1244},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999995,\n",
" 'index': 301,\n",
" 'word': '▁another',\n",
" 'start': 1245,\n",
" 'end': 1252},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999416,\n",
" 'index': 302,\n",
" 'word': '▁shot',\n",
" 'start': 1253,\n",
" 'end': 1257},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999344,\n",
" 'index': 303,\n",
" 'word': '▁just',\n",
" 'start': 1258,\n",
" 'end': 1262},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999939,\n",
" 'index': 304,\n",
" 'word': '▁to',\n",
" 'start': 1263,\n",
" 'end': 1265},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999404,\n",
" 'index': 305,\n",
" 'word': '▁make',\n",
" 'start': 1266,\n",
" 'end': 1270},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999356,\n",
" 'index': 306,\n",
" 'word': '▁sure',\n",
" 'start': 1271,\n",
" 'end': 1275},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999995,\n",
" 'index': 307,\n",
" 'word': '▁we',\n",
" 'start': 1276,\n",
" 'end': 1278},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999404,\n",
" 'index': 308,\n",
" 'word': '▁were',\n",
" 'start': 1279,\n",
" 'end': 1283},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999939,\n",
" 'index': 309,\n",
" 'word': 'n',\n",
" 'start': 1283,\n",
" 'end': 1284},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999931,\n",
" 'index': 310,\n",
" 'word': \"'\",\n",
" 'start': 1284,\n",
" 'end': 1285},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999404,\n",
" 'index': 311,\n",
" 'word': 't',\n",
" 'start': 1285,\n",
" 'end': 1286},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999297,\n",
" 'index': 312,\n",
" 'word': '▁wrong',\n",
" 'start': 1287,\n",
" 'end': 1292},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999535,\n",
" 'index': 313,\n",
" 'word': ',',\n",
" 'start': 1292,\n",
" 'end': 1293},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999944,\n",
" 'index': 314,\n",
" 'word': '▁on',\n",
" 'start': 1294,\n",
" 'end': 1296},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999937,\n",
" 'index': 315,\n",
" 'word': '▁April',\n",
" 'start': 1297,\n",
" 'end': 1302},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999285,\n",
" 'index': 316,\n",
" 'word': '▁5',\n",
" 'start': 1303,\n",
" 'end': 1304},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999356,\n",
" 'index': 317,\n",
" 'word': ',',\n",
" 'start': 1304,\n",
" 'end': 1305},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999356,\n",
" 'index': 318,\n",
" 'word': '▁1998.',\n",
" 'start': 1306,\n",
" 'end': 1311},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999875,\n",
" 'index': 319,\n",
" 'word': '▁Michael',\n",
" 'start': 1312,\n",
" 'end': 1319},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999225,\n",
" 'index': 320,\n",
" 'word': '▁Malin',\n",
" 'start': 1320,\n",
" 'end': 1325},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999944,\n",
" 'index': 321,\n",
" 'word': '▁and',\n",
" 'start': 1326,\n",
" 'end': 1329},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999475,\n",
" 'index': 322,\n",
" 'word': '▁his',\n",
" 'start': 1330,\n",
" 'end': 1333},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999926,\n",
" 'index': 323,\n",
" 'word': '▁Mars',\n",
" 'start': 1334,\n",
" 'end': 1338},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999475,\n",
" 'index': 324,\n",
" 'word': '▁Or',\n",
" 'start': 1339,\n",
" 'end': 1341},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999938,\n",
" 'index': 325,\n",
" 'word': 'bit',\n",
" 'start': 1341,\n",
" 'end': 1344},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999944,\n",
" 'index': 326,\n",
" 'word': 'er',\n",
" 'start': 1344,\n",
" 'end': 1346},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 327,\n",
" 'word': '▁camera',\n",
" 'start': 1347,\n",
" 'end': 1353},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999995,\n",
" 'index': 328,\n",
" 'word': '▁team',\n",
" 'start': 1354,\n",
" 'end': 1358},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 329,\n",
" 'word': '▁took',\n",
" 'start': 1359,\n",
" 'end': 1363},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999943,\n",
" 'index': 330,\n",
" 'word': '▁a',\n",
" 'start': 1364,\n",
" 'end': 1365},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 331,\n",
" 'word': '▁picture',\n",
" 'start': 1366,\n",
" 'end': 1373},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999995,\n",
" 'index': 332,\n",
" 'word': '▁that',\n",
" 'start': 1374,\n",
" 'end': 1378},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 333,\n",
" 'word': '▁was',\n",
" 'start': 1379,\n",
" 'end': 1382},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999475,\n",
" 'index': 334,\n",
" 'word': '▁ten',\n",
" 'start': 1383,\n",
" 'end': 1386},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 335,\n",
" 'word': '▁times',\n",
" 'start': 1387,\n",
" 'end': 1392},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999416,\n",
" 'index': 336,\n",
" 'word': '▁sharp',\n",
" 'start': 1393,\n",
" 'end': 1398},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999995,\n",
" 'index': 337,\n",
" 'word': 'er',\n",
" 'start': 1398,\n",
" 'end': 1400},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 338,\n",
" 'word': '▁than',\n",
" 'start': 1401,\n",
" 'end': 1405},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999475,\n",
" 'index': 339,\n",
" 'word': '▁the',\n",
" 'start': 1406,\n",
" 'end': 1409},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 340,\n",
" 'word': '▁original',\n",
" 'start': 1410,\n",
" 'end': 1418},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 341,\n",
" 'word': '▁Viking',\n",
" 'start': 1419,\n",
" 'end': 1425},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 342,\n",
" 'word': '▁photos',\n",
" 'start': 1426,\n",
" 'end': 1432},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999285,\n",
" 'index': 343,\n",
" 'word': ',',\n",
" 'start': 1432,\n",
" 'end': 1433},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 344,\n",
" 'word': '▁reveal',\n",
" 'start': 1434,\n",
" 'end': 1440},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 345,\n",
" 'word': 'ing',\n",
" 'start': 1440,\n",
" 'end': 1443},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999943,\n",
" 'index': 346,\n",
" 'word': '▁a',\n",
" 'start': 1444,\n",
" 'end': 1445},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 347,\n",
" 'word': '▁natural',\n",
" 'start': 1446,\n",
" 'end': 1453},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 348,\n",
" 'word': '▁land',\n",
" 'start': 1454,\n",
" 'end': 1458},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999416,\n",
" 'index': 349,\n",
" 'word': 'form',\n",
" 'start': 1458,\n",
" 'end': 1462},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999925,\n",
" 'index': 350,\n",
" 'word': ',',\n",
" 'start': 1462,\n",
" 'end': 1463},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999938,\n",
" 'index': 351,\n",
" 'word': '▁which',\n",
" 'start': 1464,\n",
" 'end': 1469},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 352,\n",
" 'word': '▁meant',\n",
" 'start': 1470,\n",
" 'end': 1475},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 353,\n",
" 'word': '▁no',\n",
" 'start': 1476,\n",
" 'end': 1478},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999475,\n",
" 'index': 354,\n",
" 'word': '▁alien',\n",
" 'start': 1479,\n",
" 'end': 1484},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999416,\n",
" 'index': 355,\n",
" 'word': '▁monument',\n",
" 'start': 1485,\n",
" 'end': 1493},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 356,\n",
" 'word': '.',\n",
" 'start': 1493,\n",
" 'end': 1494},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999344,\n",
" 'index': 357,\n",
" 'word': '▁\"',\n",
" 'start': 1495,\n",
" 'end': 1496},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999995,\n",
" 'index': 358,\n",
" 'word': 'But',\n",
" 'start': 1496,\n",
" 'end': 1499},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 359,\n",
" 'word': '▁that',\n",
" 'start': 1500,\n",
" 'end': 1504},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 360,\n",
" 'word': '▁picture',\n",
" 'start': 1505,\n",
" 'end': 1512},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 361,\n",
" 'word': '▁wasn',\n",
" 'start': 1513,\n",
" 'end': 1517},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999475,\n",
" 'index': 362,\n",
" 'word': \"'\",\n",
" 'start': 1517,\n",
" 'end': 1518},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999535,\n",
" 'index': 363,\n",
" 'word': 't',\n",
" 'start': 1518,\n",
" 'end': 1519},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 364,\n",
" 'word': '▁very',\n",
" 'start': 1520,\n",
" 'end': 1524},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 365,\n",
" 'word': '▁clear',\n",
" 'start': 1525,\n",
" 'end': 1530},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999939,\n",
" 'index': 366,\n",
" 'word': '▁at',\n",
" 'start': 1531,\n",
" 'end': 1533},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 367,\n",
" 'word': '▁all',\n",
" 'start': 1534,\n",
" 'end': 1537},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 368,\n",
" 'word': ',',\n",
" 'start': 1537,\n",
" 'end': 1538},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 369,\n",
" 'word': '▁which',\n",
" 'start': 1539,\n",
" 'end': 1544},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 370,\n",
" 'word': '▁could',\n",
" 'start': 1545,\n",
" 'end': 1550},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 371,\n",
" 'word': '▁mean',\n",
" 'start': 1551,\n",
" 'end': 1555},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 372,\n",
" 'word': '▁alien',\n",
" 'start': 1556,\n",
" 'end': 1561},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 373,\n",
" 'word': '▁mark',\n",
" 'start': 1562,\n",
" 'end': 1566},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 374,\n",
" 'word': 'ings',\n",
" 'start': 1566,\n",
" 'end': 1570},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 375,\n",
" 'word': '▁were',\n",
" 'start': 1571,\n",
" 'end': 1575},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 376,\n",
" 'word': '▁hidden',\n",
" 'start': 1576,\n",
" 'end': 1582},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 377,\n",
" 'word': '▁by',\n",
" 'start': 1583,\n",
" 'end': 1585},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999995,\n",
" 'index': 378,\n",
" 'word': '▁ha',\n",
" 'start': 1586,\n",
" 'end': 1588},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999995,\n",
" 'index': 379,\n",
" 'word': 'ze',\n",
" 'start': 1588,\n",
" 'end': 1590},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999404,\n",
" 'index': 380,\n",
" 'word': '\"',\n",
" 'start': 1590,\n",
" 'end': 1591},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999938,\n",
" 'index': 381,\n",
" 'word': '▁Well',\n",
" 'start': 1592,\n",
" 'end': 1596},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 382,\n",
" 'word': '▁no',\n",
" 'start': 1597,\n",
" 'end': 1599},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999958,\n",
" 'index': 383,\n",
" 'word': ',',\n",
" 'start': 1599,\n",
" 'end': 1600},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 384,\n",
" 'word': '▁yes',\n",
" 'start': 1601,\n",
" 'end': 1604},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 385,\n",
" 'word': '▁that',\n",
" 'start': 1605,\n",
" 'end': 1609},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 386,\n",
" 'word': '▁rumor',\n",
" 'start': 1610,\n",
" 'end': 1615},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999535,\n",
" 'index': 387,\n",
" 'word': '▁started',\n",
" 'start': 1616,\n",
" 'end': 1623},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999535,\n",
" 'index': 388,\n",
" 'word': ',',\n",
" 'start': 1623,\n",
" 'end': 1624},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999944,\n",
" 'index': 389,\n",
" 'word': '▁but',\n",
" 'start': 1625,\n",
" 'end': 1628},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999535,\n",
" 'index': 390,\n",
" 'word': '▁to',\n",
" 'start': 1629,\n",
" 'end': 1631},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 391,\n",
" 'word': '▁prove',\n",
" 'start': 1632,\n",
" 'end': 1637},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999995,\n",
" 'index': 392,\n",
" 'word': '▁them',\n",
" 'start': 1638,\n",
" 'end': 1642},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999356,\n",
" 'index': 393,\n",
" 'word': '▁wrong',\n",
" 'start': 1643,\n",
" 'end': 1648},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 394,\n",
" 'word': '▁on',\n",
" 'start': 1649,\n",
" 'end': 1651},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 395,\n",
" 'word': '▁April',\n",
" 'start': 1652,\n",
" 'end': 1657},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999945,\n",
" 'index': 396,\n",
" 'word': '▁8',\n",
" 'start': 1658,\n",
" 'end': 1659},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999944,\n",
" 'index': 397,\n",
" 'word': ',',\n",
" 'start': 1659,\n",
" 'end': 1660},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 398,\n",
" 'word': '▁2001',\n",
" 'start': 1661,\n",
" 'end': 1665},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 399,\n",
" 'word': '▁we',\n",
" 'start': 1666,\n",
" 'end': 1668},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 400,\n",
" 'word': '▁decided',\n",
" 'start': 1669,\n",
" 'end': 1676},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999944,\n",
" 'index': 401,\n",
" 'word': '▁to',\n",
" 'start': 1677,\n",
" 'end': 1679},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 402,\n",
" 'word': '▁take',\n",
" 'start': 1680,\n",
" 'end': 1684},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 403,\n",
" 'word': '▁another',\n",
" 'start': 1685,\n",
" 'end': 1692},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999535,\n",
" 'index': 404,\n",
" 'word': '▁picture',\n",
" 'start': 1693,\n",
" 'end': 1700},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999944,\n",
" 'index': 405,\n",
" 'word': ',',\n",
" 'start': 1700,\n",
" 'end': 1701},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 406,\n",
" 'word': '▁making',\n",
" 'start': 1702,\n",
" 'end': 1708},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999475,\n",
" 'index': 407,\n",
" 'word': '▁sure',\n",
" 'start': 1709,\n",
" 'end': 1713},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 408,\n",
" 'word': '▁it',\n",
" 'start': 1714,\n",
" 'end': 1716},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 409,\n",
" 'word': '▁was',\n",
" 'start': 1717,\n",
" 'end': 1720},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999995,\n",
" 'index': 410,\n",
" 'word': '▁a',\n",
" 'start': 1721,\n",
" 'end': 1722},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 411,\n",
" 'word': '▁cloud',\n",
" 'start': 1723,\n",
" 'end': 1728},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 412,\n",
" 'word': 'less',\n",
" 'start': 1728,\n",
" 'end': 1732},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 413,\n",
" 'word': '▁summer',\n",
" 'start': 1733,\n",
" 'end': 1739},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 414,\n",
" 'word': '▁day',\n",
" 'start': 1740,\n",
" 'end': 1743},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999404,\n",
" 'index': 415,\n",
" 'word': '.',\n",
" 'start': 1743,\n",
" 'end': 1744},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999913,\n",
" 'index': 416,\n",
" 'word': '▁Malin',\n",
" 'start': 1745,\n",
" 'end': 1750},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999939,\n",
" 'index': 417,\n",
" 'word': \"'\",\n",
" 'start': 1750,\n",
" 'end': 1751},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999356,\n",
" 'index': 418,\n",
" 'word': 's',\n",
" 'start': 1751,\n",
" 'end': 1752},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999944,\n",
" 'index': 419,\n",
" 'word': '▁team',\n",
" 'start': 1753,\n",
" 'end': 1757},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999995,\n",
" 'index': 420,\n",
" 'word': '▁capture',\n",
" 'start': 1758,\n",
" 'end': 1765},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 421,\n",
" 'word': 'd',\n",
" 'start': 1765,\n",
" 'end': 1766},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999944,\n",
" 'index': 422,\n",
" 'word': '▁an',\n",
" 'start': 1767,\n",
" 'end': 1769},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 423,\n",
" 'word': '▁amazing',\n",
" 'start': 1770,\n",
" 'end': 1777},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 424,\n",
" 'word': '▁photo',\n",
" 'start': 1778,\n",
" 'end': 1783},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 425,\n",
" 'word': '▁using',\n",
" 'start': 1784,\n",
" 'end': 1789},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999943,\n",
" 'index': 426,\n",
" 'word': '▁the',\n",
" 'start': 1790,\n",
" 'end': 1793},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999475,\n",
" 'index': 427,\n",
" 'word': '▁camera',\n",
" 'start': 1794,\n",
" 'end': 1800},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 428,\n",
" 'word': \"'\",\n",
" 'start': 1800,\n",
" 'end': 1801},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 429,\n",
" 'word': 's',\n",
" 'start': 1801,\n",
" 'end': 1802},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 430,\n",
" 'word': '▁absolute',\n",
" 'start': 1803,\n",
" 'end': 1811},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999945,\n",
" 'index': 431,\n",
" 'word': '▁maximum',\n",
" 'start': 1812,\n",
" 'end': 1819},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999404,\n",
" 'index': 432,\n",
" 'word': '▁revolution',\n",
" 'start': 1820,\n",
" 'end': 1830},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999404,\n",
" 'index': 433,\n",
" 'word': '.',\n",
" 'start': 1830,\n",
" 'end': 1831},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 434,\n",
" 'word': '▁With',\n",
" 'start': 1832,\n",
" 'end': 1836},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999475,\n",
" 'index': 435,\n",
" 'word': '▁this',\n",
" 'start': 1837,\n",
" 'end': 1841},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999475,\n",
" 'index': 436,\n",
" 'word': '▁camera',\n",
" 'start': 1842,\n",
" 'end': 1848},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999416,\n",
" 'index': 437,\n",
" 'word': '▁you',\n",
" 'start': 1849,\n",
" 'end': 1852},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999945,\n",
" 'index': 438,\n",
" 'word': '▁can',\n",
" 'start': 1853,\n",
" 'end': 1856},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999933,\n",
" 'index': 439,\n",
" 'word': '▁discern',\n",
" 'start': 1857,\n",
" 'end': 1864},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 440,\n",
" 'word': '▁things',\n",
" 'start': 1865,\n",
" 'end': 1871},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999416,\n",
" 'index': 441,\n",
" 'word': '▁in',\n",
" 'start': 1872,\n",
" 'end': 1874},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999404,\n",
" 'index': 442,\n",
" 'word': '▁a',\n",
" 'start': 1875,\n",
" 'end': 1876},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999937,\n",
" 'index': 443,\n",
" 'word': '▁digital',\n",
" 'start': 1877,\n",
" 'end': 1884},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999937,\n",
" 'index': 444,\n",
" 'word': '▁image',\n",
" 'start': 1885,\n",
" 'end': 1890},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999939,\n",
" 'index': 445,\n",
" 'word': ',',\n",
" 'start': 1890,\n",
" 'end': 1891},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 446,\n",
" 'word': '▁3',\n",
" 'start': 1892,\n",
" 'end': 1893},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999416,\n",
" 'index': 447,\n",
" 'word': '▁times',\n",
" 'start': 1894,\n",
" 'end': 1899},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999937,\n",
" 'index': 448,\n",
" 'word': '▁bigger',\n",
" 'start': 1900,\n",
" 'end': 1906},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999931,\n",
" 'index': 449,\n",
" 'word': '▁than',\n",
" 'start': 1907,\n",
" 'end': 1911},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999356,\n",
" 'index': 450,\n",
" 'word': '▁the',\n",
" 'start': 1912,\n",
" 'end': 1915},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999992,\n",
" 'index': 451,\n",
" 'word': '▁pixel',\n",
" 'start': 1916,\n",
" 'end': 1921},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999927,\n",
" 'index': 452,\n",
" 'word': '▁size',\n",
" 'start': 1922,\n",
" 'end': 1926},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 453,\n",
" 'word': '▁which',\n",
" 'start': 1927,\n",
" 'end': 1932},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999944,\n",
" 'index': 454,\n",
" 'word': '▁means',\n",
" 'start': 1933,\n",
" 'end': 1938},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999943,\n",
" 'index': 455,\n",
" 'word': '▁if',\n",
" 'start': 1939,\n",
" 'end': 1941},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999992,\n",
" 'index': 456,\n",
" 'word': '▁there',\n",
" 'start': 1942,\n",
" 'end': 1947},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999931,\n",
" 'index': 457,\n",
" 'word': '▁were',\n",
" 'start': 1948,\n",
" 'end': 1952},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999938,\n",
" 'index': 458,\n",
" 'word': '▁any',\n",
" 'start': 1953,\n",
" 'end': 1956},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999927,\n",
" 'index': 459,\n",
" 'word': '▁sign',\n",
" 'start': 1957,\n",
" 'end': 1961},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999992,\n",
" 'index': 460,\n",
" 'word': 's',\n",
" 'start': 1961,\n",
" 'end': 1962},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999926,\n",
" 'index': 461,\n",
" 'word': '▁of',\n",
" 'start': 1963,\n",
" 'end': 1965},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999404,\n",
" 'index': 462,\n",
" 'word': '▁life',\n",
" 'start': 1966,\n",
" 'end': 1970},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999106,\n",
" 'index': 463,\n",
" 'word': ',',\n",
" 'start': 1970,\n",
" 'end': 1971},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999943,\n",
" 'index': 464,\n",
" 'word': '▁you',\n",
" 'start': 1972,\n",
" 'end': 1975},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999944,\n",
" 'index': 465,\n",
" 'word': '▁could',\n",
" 'start': 1976,\n",
" 'end': 1981},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999945,\n",
" 'index': 466,\n",
" 'word': '▁easily',\n",
" 'start': 1982,\n",
" 'end': 1988},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999939,\n",
" 'index': 467,\n",
" 'word': '▁see',\n",
" 'start': 1989,\n",
" 'end': 1992},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 468,\n",
" 'word': '▁what',\n",
" 'start': 1993,\n",
" 'end': 1997},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999944,\n",
" 'index': 469,\n",
" 'word': '▁they',\n",
" 'start': 1998,\n",
" 'end': 2002},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999945,\n",
" 'index': 470,\n",
" 'word': '▁were',\n",
" 'start': 2003,\n",
" 'end': 2007},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 471,\n",
" 'word': '.',\n",
" 'start': 2007,\n",
" 'end': 2008},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999995,\n",
" 'index': 472,\n",
" 'word': '▁What',\n",
" 'start': 2009,\n",
" 'end': 2013},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 473,\n",
" 'word': '▁the',\n",
" 'start': 2014,\n",
" 'end': 2017},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999475,\n",
" 'index': 474,\n",
" 'word': '▁picture',\n",
" 'start': 2018,\n",
" 'end': 2025},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 475,\n",
" 'word': '▁showed',\n",
" 'start': 2026,\n",
" 'end': 2032},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 476,\n",
" 'word': '▁was',\n",
" 'start': 2033,\n",
" 'end': 2036},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 477,\n",
" 'word': '▁the',\n",
" 'start': 2037,\n",
" 'end': 2040},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 478,\n",
" 'word': '▁but',\n",
" 'start': 2041,\n",
" 'end': 2044},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 479,\n",
" 'word': 'te',\n",
" 'start': 2044,\n",
" 'end': 2046},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999995,\n",
" 'index': 480,\n",
" 'word': '▁or',\n",
" 'start': 2047,\n",
" 'end': 2049},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999937,\n",
" 'index': 481,\n",
" 'word': '▁mesa',\n",
" 'start': 2050,\n",
" 'end': 2054},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999949,\n",
" 'index': 482,\n",
" 'word': ',',\n",
" 'start': 2054,\n",
" 'end': 2055},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999957,\n",
" 'index': 483,\n",
" 'word': '▁which',\n",
" 'start': 2056,\n",
" 'end': 2061},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999607,\n",
" 'index': 484,\n",
" 'word': '▁are',\n",
" 'start': 2062,\n",
" 'end': 2065},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 485,\n",
" 'word': '▁land',\n",
" 'start': 2066,\n",
" 'end': 2070},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.999995,\n",
" 'index': 486,\n",
" 'word': 'form',\n",
" 'start': 2070,\n",
" 'end': 2074},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999547,\n",
" 'index': 487,\n",
" 'word': 's',\n",
" 'start': 2074,\n",
" 'end': 2075},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999945,\n",
" 'index': 488,\n",
" 'word': '▁common',\n",
" 'start': 2076,\n",
" 'end': 2082},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999464,\n",
" 'index': 489,\n",
" 'word': '▁around',\n",
" 'start': 2083,\n",
" 'end': 2089},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 490,\n",
" 'word': '▁the',\n",
" 'start': 2090,\n",
" 'end': 2093},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999956,\n",
" 'index': 491,\n",
" 'word': '▁American',\n",
" 'start': 2094,\n",
" 'end': 2102},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.99999523,\n",
" 'index': 492,\n",
" 'word': '▁West',\n",
" 'start': 2103,\n",
" 'end': 2107},\n",
" {'entity': 'I-EN',\n",
" 'score': 0.9999951,\n",
" 'index': 493,\n",
" 'word': '.',\n",
" 'start': 2107,\n",
" 'end': 2108}]"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"msislam/code-mixed-language-detection-XLMRoberta\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"msislam/code-mixed-language-detection-XLMRoberta\")\n",
"nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"\n",
"ner_results = nlp(text)\n",
"ner_results"
]
},
{
"cell_type": "code",
"execution_count": 131,
"id": "f6603b67-95f1-4da1-9cff-da6f138c57fa",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"I-EN 493\n",
"dtype: int64\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" entity | \n",
" score | \n",
"
\n",
" \n",
" entity | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" I-EN | \n",
" 493 | \n",
" 0.999995 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" entity score\n",
"entity \n",
"I-EN 493 0.999995"
]
},
"execution_count": 131,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"with open(\"39 msislamcode-mixed-language-detection-XLMRoberta.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()\n",
"\n",
"\n",
"aux.groupby(['entity']) \\\n",
" .agg({'entity':'size', 'score':'mean'})"
]
},
{
"cell_type": "markdown",
"id": "2c588a1c-faa2-4955-bc83-9bdf2bdb3692",
"metadata": {},
"source": [
"## 40 DunnBC22/bert-base-multilingual-cased-fine_tuned-ner-WikiNeural_Multilingual"
]
},
{
"cell_type": "code",
"execution_count": 82,
"id": "21949283-c47a-4c93-8965-98ce4b11d09a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'entity': 'B-ORG',\n",
" 'score': 0.98604935,\n",
" 'index': 8,\n",
" 'word': 'NASA',\n",
" 'start': 16,\n",
" 'end': 20},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.67684585,\n",
" 'index': 23,\n",
" 'word': 'Face',\n",
" 'start': 88,\n",
" 'end': 92},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.6898482,\n",
" 'index': 24,\n",
" 'word': 'On',\n",
" 'start': 93,\n",
" 'end': 95},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.72657937,\n",
" 'index': 25,\n",
" 'word': 'Mars',\n",
" 'start': 96,\n",
" 'end': 100},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.8695399,\n",
" 'index': 37,\n",
" 'word': 'Mars',\n",
" 'start': 152,\n",
" 'end': 156},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.87388104,\n",
" 'index': 60,\n",
" 'word': 'Viking',\n",
" 'start': 240,\n",
" 'end': 246},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.9686202,\n",
" 'index': 61,\n",
" 'word': '1',\n",
" 'start': 247,\n",
" 'end': 248},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.6234842,\n",
" 'index': 98,\n",
" 'word': 'Mart',\n",
" 'start': 407,\n",
" 'end': 411},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.7816169,\n",
" 'index': 99,\n",
" 'word': '##ian',\n",
" 'start': 411,\n",
" 'end': 414},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.91626126,\n",
" 'index': 104,\n",
" 'word': 'C',\n",
" 'start': 435,\n",
" 'end': 436},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.8611923,\n",
" 'index': 105,\n",
" 'word': '##yd',\n",
" 'start': 436,\n",
" 'end': 438},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.80380625,\n",
" 'index': 106,\n",
" 'word': '##onia',\n",
" 'start': 438,\n",
" 'end': 442},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.4532804,\n",
" 'index': 121,\n",
" 'word': 'Egypt',\n",
" 'start': 496,\n",
" 'end': 501},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.64347696,\n",
" 'index': 122,\n",
" 'word': '##ion',\n",
" 'start': 501,\n",
" 'end': 504},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.48230314,\n",
" 'index': 123,\n",
" 'word': 'Ph',\n",
" 'start': 505,\n",
" 'end': 507},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.4377054,\n",
" 'index': 124,\n",
" 'word': '##ara',\n",
" 'start': 507,\n",
" 'end': 510},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.49611342,\n",
" 'index': 125,\n",
" 'word': '##oh',\n",
" 'start': 510,\n",
" 'end': 512},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.9510295,\n",
" 'index': 194,\n",
" 'word': 'NASA',\n",
" 'start': 801,\n",
" 'end': 805},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.8271464,\n",
" 'index': 205,\n",
" 'word': 'Mars',\n",
" 'start': 843,\n",
" 'end': 847},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.8054947,\n",
" 'index': 215,\n",
" 'word': 'Mars',\n",
" 'start': 875,\n",
" 'end': 879},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.865675,\n",
" 'index': 263,\n",
" 'word': 'Mars',\n",
" 'start': 1088,\n",
" 'end': 1092},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.98267186,\n",
" 'index': 282,\n",
" 'word': 'NASA',\n",
" 'start': 1169,\n",
" 'end': 1173},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.70751286,\n",
" 'index': 291,\n",
" 'word': 'Mars',\n",
" 'start': 1220,\n",
" 'end': 1224},\n",
" {'entity': 'B-PER',\n",
" 'score': 0.99715245,\n",
" 'index': 316,\n",
" 'word': 'Michael',\n",
" 'start': 1312,\n",
" 'end': 1319},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.9968267,\n",
" 'index': 317,\n",
" 'word': 'Mali',\n",
" 'start': 1320,\n",
" 'end': 1324},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.99570113,\n",
" 'index': 318,\n",
" 'word': '##n',\n",
" 'start': 1324,\n",
" 'end': 1325},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.83701926,\n",
" 'index': 321,\n",
" 'word': 'Mars',\n",
" 'start': 1334,\n",
" 'end': 1338},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.8589732,\n",
" 'index': 322,\n",
" 'word': 'Or',\n",
" 'start': 1339,\n",
" 'end': 1341},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.91832304,\n",
" 'index': 323,\n",
" 'word': '##biter',\n",
" 'start': 1341,\n",
" 'end': 1346},\n",
" {'entity': 'I-MISC',\n",
" 'score': 0.86819035,\n",
" 'index': 338,\n",
" 'word': 'Viking',\n",
" 'start': 1419,\n",
" 'end': 1425},\n",
" {'entity': 'B-PER',\n",
" 'score': 0.9447502,\n",
" 'index': 415,\n",
" 'word': 'Mali',\n",
" 'start': 1745,\n",
" 'end': 1749},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.95286584,\n",
" 'index': 416,\n",
" 'word': '##n',\n",
" 'start': 1749,\n",
" 'end': 1750},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.6390979,\n",
" 'index': 492,\n",
" 'word': 'American',\n",
" 'start': 2094,\n",
" 'end': 2102},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.63282007,\n",
" 'index': 493,\n",
" 'word': 'West',\n",
" 'start': 2103,\n",
" 'end': 2107}]"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"DunnBC22/bert-base-multilingual-cased-fine_tuned-ner-WikiNeural_Multilingual\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"DunnBC22/bert-base-multilingual-cased-fine_tuned-ner-WikiNeural_Multilingual\")\n",
"nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"\n",
"ner_results = nlp(text)\n",
"ner_results\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "1cbc0df4-5b76-4aef-9ca4-915b70274014",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"B-LOC 8\n",
"B-ORG 3\n",
"B-PER 2\n",
"I-LOC 3\n",
"I-MISC 15\n",
"I-PER 3\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word \n",
"B-LOC American 1\n",
" C 1\n",
" Egypt 1\n",
" Mars 5\n",
"B-ORG NASA 3\n",
"B-PER Mali 1\n",
" Michael 1\n",
"I-LOC ##onia 1\n",
" ##yd 1\n",
" West 1\n",
"I-MISC ##ara 1\n",
" ##biter 1\n",
" ##ian 1\n",
" ##ion 1\n",
" ##oh 1\n",
" 1 1\n",
" Face 1\n",
" Mars 2\n",
" Mart 1\n",
" On 1\n",
" Or 1\n",
" Ph 1\n",
" Viking 2\n",
"I-PER ##n 2\n",
" Mali 1\n",
"dtype: int64"
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"with open(\"40 DunnBC22bert-base-multilingual-cased-fine_tuned-ner-WikiNeural_Multilingual.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "fd1d4edf-44f2-4d33-aaef-74e2351995ae",
"metadata": {},
"source": [
"## 41 rollerhafeezh-amikom/xlm-roberta-base-ner-silvanus "
]
},
{
"cell_type": "code",
"execution_count": 85,
"id": "31b2cf11-39a1-43cf-9679-76eaef9b5428",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'entity': 'B-LOC',\n",
" 'score': 0.9711031,\n",
" 'index': 103,\n",
" 'word': '▁Cy',\n",
" 'start': 435,\n",
" 'end': 437},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.97973377,\n",
" 'index': 104,\n",
" 'word': 'do',\n",
" 'start': 437,\n",
" 'end': 439},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.98118365,\n",
" 'index': 105,\n",
" 'word': 'nia',\n",
" 'start': 439,\n",
" 'end': 442},\n",
" {'entity': 'B-DAT',\n",
" 'score': 0.99709177,\n",
" 'index': 315,\n",
" 'word': '▁April',\n",
" 'start': 1297,\n",
" 'end': 1302},\n",
" {'entity': 'I-DAT',\n",
" 'score': 0.9621371,\n",
" 'index': 316,\n",
" 'word': '▁5',\n",
" 'start': 1303,\n",
" 'end': 1304},\n",
" {'entity': 'I-DAT',\n",
" 'score': 0.9620826,\n",
" 'index': 317,\n",
" 'word': ',',\n",
" 'start': 1304,\n",
" 'end': 1305},\n",
" {'entity': 'I-DAT',\n",
" 'score': 0.98092973,\n",
" 'index': 318,\n",
" 'word': '▁1998.',\n",
" 'start': 1306,\n",
" 'end': 1311},\n",
" {'entity': 'B-DAT',\n",
" 'score': 0.9965664,\n",
" 'index': 395,\n",
" 'word': '▁April',\n",
" 'start': 1652,\n",
" 'end': 1657},\n",
" {'entity': 'I-DAT',\n",
" 'score': 0.9623601,\n",
" 'index': 396,\n",
" 'word': '▁8',\n",
" 'start': 1658,\n",
" 'end': 1659},\n",
" {'entity': 'I-DAT',\n",
" 'score': 0.9603569,\n",
" 'index': 397,\n",
" 'word': ',',\n",
" 'start': 1659,\n",
" 'end': 1660},\n",
" {'entity': 'I-DAT',\n",
" 'score': 0.9859262,\n",
" 'index': 398,\n",
" 'word': '▁2001',\n",
" 'start': 1661,\n",
" 'end': 1665},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.9732382,\n",
" 'index': 491,\n",
" 'word': '▁American',\n",
" 'start': 2094,\n",
" 'end': 2102},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.6832874,\n",
" 'index': 492,\n",
" 'word': '▁West',\n",
" 'start': 2103,\n",
" 'end': 2107}]"
]
},
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"rollerhafeezh-amikom/xlm-roberta-base-ner-silvanus\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"rollerhafeezh-amikom/xlm-roberta-base-ner-silvanus\")\n",
"nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"\n",
"ner_results = nlp(text)\n",
"ner_results"
]
},
{
"cell_type": "code",
"execution_count": 86,
"id": "e7cc7649-0234-4fb3-9802-e5909f30a3b1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"B-DAT 2\n",
"B-LOC 2\n",
"I-DAT 6\n",
"I-LOC 3\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word \n",
"B-DAT ▁April 2\n",
"B-LOC ▁American 1\n",
" ▁Cy 1\n",
"I-DAT , 2\n",
" ▁1998. 1\n",
" ▁2001 1\n",
" ▁5 1\n",
" ▁8 1\n",
"I-LOC do 1\n",
" nia 1\n",
" ▁West 1\n",
"dtype: int64"
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open(\"41 rollerhafeezh-amikomxlm-roberta-base-ner-silvanus.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "dff1d357-0650-4946-8703-0fe86143eb17",
"metadata": {},
"source": [
"## 42 orgcatorg/distilbert-base-multilingual-cased-ner"
]
},
{
"cell_type": "code",
"execution_count": 88,
"id": "b5293a8a-c29f-4090-9db2-b7f92b4842f5",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n"
]
},
{
"data": {
"text/plain": [
"[{'entity': 'B-LOC',\n",
" 'score': 0.9980641,\n",
" 'index': 104,\n",
" 'word': 'C',\n",
" 'start': 435,\n",
" 'end': 436},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.99705327,\n",
" 'index': 105,\n",
" 'word': '##yd',\n",
" 'start': 436,\n",
" 'end': 438},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.9987865,\n",
" 'index': 106,\n",
" 'word': '##onia',\n",
" 'start': 438,\n",
" 'end': 442},\n",
" {'entity': 'B-PER',\n",
" 'score': 0.66370535,\n",
" 'index': 121,\n",
" 'word': 'Egypt',\n",
" 'start': 496,\n",
" 'end': 501},\n",
" {'entity': 'B-PER',\n",
" 'score': 0.6953692,\n",
" 'index': 122,\n",
" 'word': '##ion',\n",
" 'start': 501,\n",
" 'end': 504},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.6620473,\n",
" 'index': 123,\n",
" 'word': 'Ph',\n",
" 'start': 505,\n",
" 'end': 507},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.7284523,\n",
" 'index': 124,\n",
" 'word': '##ara',\n",
" 'start': 507,\n",
" 'end': 510},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.704528,\n",
" 'index': 125,\n",
" 'word': '##oh',\n",
" 'start': 510,\n",
" 'end': 512},\n",
" {'entity': 'B-PER',\n",
" 'score': 0.9904669,\n",
" 'index': 316,\n",
" 'word': 'Michael',\n",
" 'start': 1312,\n",
" 'end': 1319},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.9859772,\n",
" 'index': 317,\n",
" 'word': 'Mali',\n",
" 'start': 1320,\n",
" 'end': 1324},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.9845413,\n",
" 'index': 318,\n",
" 'word': '##n',\n",
" 'start': 1324,\n",
" 'end': 1325},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.6711369,\n",
" 'index': 492,\n",
" 'word': 'American',\n",
" 'start': 2094,\n",
" 'end': 2102},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.49512243,\n",
" 'index': 493,\n",
" 'word': 'West',\n",
" 'start': 2103,\n",
" 'end': 2107}]"
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"orgcatorg/distilbert-base-multilingual-cased-ner\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"orgcatorg/distilbert-base-multilingual-cased-ner\")\n",
"nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"\n",
"ner_results = nlp(text)\n",
"ner_results"
]
},
{
"cell_type": "code",
"execution_count": 89,
"id": "63543f53-7959-4cae-9090-926897ead7f5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"B-LOC 4\n",
"B-PER 3\n",
"I-LOC 1\n",
"I-PER 5\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word \n",
"B-LOC ##onia 1\n",
" ##yd 1\n",
" American 1\n",
" C 1\n",
"B-PER ##ion 1\n",
" Egypt 1\n",
" Michael 1\n",
"I-LOC West 1\n",
"I-PER ##ara 1\n",
" ##n 1\n",
" ##oh 1\n",
" Mali 1\n",
" Ph 1\n",
"dtype: int64"
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open(\"42 orgcatorgdistilbert-base-multilingual-cased-ner.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "846da5e7-bd48-4d24-9c48-4da51f22fadb",
"metadata": {},
"source": [
"## 43 orgcatorg/xlm-v-base-ner"
]
},
{
"cell_type": "code",
"execution_count": 92,
"id": "94d18491-a366-4197-b0c5-de86e41ee576",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n"
]
},
{
"data": {
"text/plain": [
"[{'entity': 'B-ORG',\n",
" 'score': 0.8529484,\n",
" 'index': 8,\n",
" 'word': '▁NASA',\n",
" 'start': 15,\n",
" 'end': 20},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.67219996,\n",
" 'index': 23,\n",
" 'word': '▁Face',\n",
" 'start': 87,\n",
" 'end': 92},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.9593816,\n",
" 'index': 24,\n",
" 'word': '▁On',\n",
" 'start': 92,\n",
" 'end': 95},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.98047435,\n",
" 'index': 25,\n",
" 'word': '▁Mars',\n",
" 'start': 95,\n",
" 'end': 100},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.97809476,\n",
" 'index': 57,\n",
" 'word': '▁Viking',\n",
" 'start': 239,\n",
" 'end': 246},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.97545755,\n",
" 'index': 58,\n",
" 'word': '▁1',\n",
" 'start': 246,\n",
" 'end': 248},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.5685065,\n",
" 'index': 59,\n",
" 'word': '▁space',\n",
" 'start': 248,\n",
" 'end': 254},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.7317715,\n",
" 'index': 93,\n",
" 'word': '▁Marti',\n",
" 'start': 406,\n",
" 'end': 412},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.77504426,\n",
" 'index': 94,\n",
" 'word': 'an',\n",
" 'start': 412,\n",
" 'end': 414},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.73297995,\n",
" 'index': 99,\n",
" 'word': '▁Cy',\n",
" 'start': 434,\n",
" 'end': 437},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.9430252,\n",
" 'index': 113,\n",
" 'word': '▁Egypti',\n",
" 'start': 495,\n",
" 'end': 502},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.9459621,\n",
" 'index': 114,\n",
" 'word': 'on',\n",
" 'start': 502,\n",
" 'end': 504},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.98246944,\n",
" 'index': 115,\n",
" 'word': '▁Pharao',\n",
" 'start': 504,\n",
" 'end': 511},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.9633557,\n",
" 'index': 116,\n",
" 'word': 'h',\n",
" 'start': 511,\n",
" 'end': 512},\n",
" {'entity': 'B-PER',\n",
" 'score': 0.9796036,\n",
" 'index': 294,\n",
" 'word': '▁Michael',\n",
" 'start': 1311,\n",
" 'end': 1319},\n",
" {'entity': 'I-PER',\n",
" 'score': 0.98091847,\n",
" 'index': 295,\n",
" 'word': '▁Malin',\n",
" 'start': 1319,\n",
" 'end': 1325},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.9358236,\n",
" 'index': 298,\n",
" 'word': '▁Mars',\n",
" 'start': 1333,\n",
" 'end': 1338},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.90482205,\n",
" 'index': 299,\n",
" 'word': '▁Orbit',\n",
" 'start': 1338,\n",
" 'end': 1344},\n",
" {'entity': 'I-ORG',\n",
" 'score': 0.70797783,\n",
" 'index': 300,\n",
" 'word': 'er',\n",
" 'start': 1344,\n",
" 'end': 1346},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.73849326,\n",
" 'index': 448,\n",
" 'word': '▁but',\n",
" 'start': 2040,\n",
" 'end': 2044},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.6988277,\n",
" 'index': 449,\n",
" 'word': 'te',\n",
" 'start': 2044,\n",
" 'end': 2046},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.5565135,\n",
" 'index': 460,\n",
" 'word': '▁American',\n",
" 'start': 2093,\n",
" 'end': 2102}]"
]
},
"execution_count": 92,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"orgcatorg/xlm-v-base-ner\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"orgcatorg/xlm-v-base-ner\")\n",
"nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"\n",
"ner_results = nlp(text)\n",
"ner_results"
]
},
{
"cell_type": "code",
"execution_count": 93,
"id": "da937121-ff84-4ee6-9cdc-a624047999ea",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"B-LOC 8\n",
"B-ORG 4\n",
"B-PER 1\n",
"I-LOC 2\n",
"I-ORG 6\n",
"I-PER 1\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word \n",
"B-LOC an 1\n",
" on 1\n",
" te 1\n",
" ▁American 1\n",
" ▁Cy 1\n",
" ▁Egypti 1\n",
" ▁Marti 1\n",
" ▁but 1\n",
"B-ORG ▁Face 1\n",
" ▁Mars 1\n",
" ▁NASA 1\n",
" ▁Viking 1\n",
"B-PER ▁Michael 1\n",
"I-LOC h 1\n",
" ▁Pharao 1\n",
"I-ORG er 1\n",
" ▁1 1\n",
" ▁Mars 1\n",
" ▁On 1\n",
" ▁Orbit 1\n",
" ▁space 1\n",
"I-PER ▁Malin 1\n",
"dtype: int64"
]
},
"execution_count": 93,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open(\"43 orgcatorgxlm-v-base-ner.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "46e2733d-93b9-447b-91d4-efae62f4d2b6",
"metadata": {},
"source": [
"## 44 dejanseo/LinkBERT-XL"
]
},
{
"cell_type": "code",
"execution_count": 95,
"id": "7c2c315e-cf4c-453e-afcf-816049cfc16a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'entity': 'LABEL_0',\n",
" 'score': 0.99891233,\n",
" 'index': 1,\n",
" 'word': '▁So',\n",
" 'start': 0,\n",
" 'end': 2},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99892116,\n",
" 'index': 2,\n",
" 'word': ',',\n",
" 'start': 2,\n",
" 'end': 3},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9979603,\n",
" 'index': 3,\n",
" 'word': '▁if',\n",
" 'start': 4,\n",
" 'end': 6},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99819654,\n",
" 'index': 4,\n",
" 'word': '▁you',\n",
" 'start': 7,\n",
" 'end': 10},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9984597,\n",
" 'index': 5,\n",
" 'word': \"'\",\n",
" 'start': 10,\n",
" 'end': 11},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99839216,\n",
" 'index': 6,\n",
" 'word': 're',\n",
" 'start': 11,\n",
" 'end': 13},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9984572,\n",
" 'index': 7,\n",
" 'word': '▁a',\n",
" 'start': 14,\n",
" 'end': 15},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9959991,\n",
" 'index': 8,\n",
" 'word': '▁NASA',\n",
" 'start': 16,\n",
" 'end': 20},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99812514,\n",
" 'index': 9,\n",
" 'word': '▁scientist',\n",
" 'start': 21,\n",
" 'end': 30},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9979564,\n",
" 'index': 10,\n",
" 'word': ',',\n",
" 'start': 30,\n",
" 'end': 31},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98711306,\n",
" 'index': 11,\n",
" 'word': '▁you',\n",
" 'start': 32,\n",
" 'end': 35},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9878263,\n",
" 'index': 12,\n",
" 'word': '▁should',\n",
" 'start': 36,\n",
" 'end': 42},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98597556,\n",
" 'index': 13,\n",
" 'word': '▁be',\n",
" 'start': 43,\n",
" 'end': 45},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9839212,\n",
" 'index': 14,\n",
" 'word': '▁able',\n",
" 'start': 46,\n",
" 'end': 50},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9830526,\n",
" 'index': 15,\n",
" 'word': '▁to',\n",
" 'start': 51,\n",
" 'end': 53},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9660563,\n",
" 'index': 16,\n",
" 'word': '▁tell',\n",
" 'start': 54,\n",
" 'end': 58},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9594903,\n",
" 'index': 17,\n",
" 'word': '▁me',\n",
" 'start': 59,\n",
" 'end': 61},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9053577,\n",
" 'index': 18,\n",
" 'word': '▁the',\n",
" 'start': 62,\n",
" 'end': 65},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.84808695,\n",
" 'index': 19,\n",
" 'word': '▁whole',\n",
" 'start': 66,\n",
" 'end': 71},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.87426686,\n",
" 'index': 20,\n",
" 'word': '▁story',\n",
" 'start': 72,\n",
" 'end': 77},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9940403,\n",
" 'index': 21,\n",
" 'word': '▁about',\n",
" 'start': 78,\n",
" 'end': 83},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9932741,\n",
" 'index': 22,\n",
" 'word': '▁the',\n",
" 'start': 84,\n",
" 'end': 87},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9718788,\n",
" 'index': 23,\n",
" 'word': '▁Face',\n",
" 'start': 88,\n",
" 'end': 92},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98829424,\n",
" 'index': 24,\n",
" 'word': '▁On',\n",
" 'start': 93,\n",
" 'end': 95},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9920151,\n",
" 'index': 25,\n",
" 'word': '▁Mars',\n",
" 'start': 96,\n",
" 'end': 100},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99279845,\n",
" 'index': 26,\n",
" 'word': ',',\n",
" 'start': 100,\n",
" 'end': 101},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9976579,\n",
" 'index': 27,\n",
" 'word': '▁which',\n",
" 'start': 102,\n",
" 'end': 107},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9982932,\n",
" 'index': 28,\n",
" 'word': '▁obviously',\n",
" 'start': 108,\n",
" 'end': 117},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99760216,\n",
" 'index': 29,\n",
" 'word': '▁is',\n",
" 'start': 118,\n",
" 'end': 120},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9973246,\n",
" 'index': 30,\n",
" 'word': '▁evidence',\n",
" 'start': 121,\n",
" 'end': 129},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99918765,\n",
" 'index': 31,\n",
" 'word': '▁that',\n",
" 'start': 130,\n",
" 'end': 134},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99898845,\n",
" 'index': 32,\n",
" 'word': '▁there',\n",
" 'start': 135,\n",
" 'end': 140},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9990656,\n",
" 'index': 33,\n",
" 'word': '▁is',\n",
" 'start': 141,\n",
" 'end': 143},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.96821344,\n",
" 'index': 34,\n",
" 'word': '▁life',\n",
" 'start': 144,\n",
" 'end': 148},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9991037,\n",
" 'index': 35,\n",
" 'word': '▁on',\n",
" 'start': 149,\n",
" 'end': 151},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9991229,\n",
" 'index': 36,\n",
" 'word': '▁Mars',\n",
" 'start': 152,\n",
" 'end': 156},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9988174,\n",
" 'index': 37,\n",
" 'word': ',',\n",
" 'start': 156,\n",
" 'end': 157},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9993888,\n",
" 'index': 38,\n",
" 'word': '▁and',\n",
" 'start': 158,\n",
" 'end': 161},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99929714,\n",
" 'index': 39,\n",
" 'word': '▁that',\n",
" 'start': 162,\n",
" 'end': 166},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9986438,\n",
" 'index': 40,\n",
" 'word': '▁the',\n",
" 'start': 167,\n",
" 'end': 170},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99628323,\n",
" 'index': 41,\n",
" 'word': '▁face',\n",
" 'start': 171,\n",
" 'end': 175},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99901855,\n",
" 'index': 42,\n",
" 'word': '▁was',\n",
" 'start': 176,\n",
" 'end': 179},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9981996,\n",
" 'index': 43,\n",
" 'word': '▁created',\n",
" 'start': 180,\n",
" 'end': 187},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9987431,\n",
" 'index': 44,\n",
" 'word': '▁by',\n",
" 'start': 188,\n",
" 'end': 190},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9988205,\n",
" 'index': 45,\n",
" 'word': '▁alien',\n",
" 'start': 191,\n",
" 'end': 196},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99885654,\n",
" 'index': 46,\n",
" 'word': 's',\n",
" 'start': 196,\n",
" 'end': 197},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9992561,\n",
" 'index': 47,\n",
" 'word': ',',\n",
" 'start': 197,\n",
" 'end': 198},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99904126,\n",
" 'index': 48,\n",
" 'word': '▁correct',\n",
" 'start': 199,\n",
" 'end': 206},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9989654,\n",
" 'index': 49,\n",
" 'word': '?\"',\n",
" 'start': 206,\n",
" 'end': 208},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99859565,\n",
" 'index': 50,\n",
" 'word': '▁No',\n",
" 'start': 209,\n",
" 'end': 211},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99823654,\n",
" 'index': 51,\n",
" 'word': ',',\n",
" 'start': 211,\n",
" 'end': 212},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9979513,\n",
" 'index': 52,\n",
" 'word': '▁twenty',\n",
" 'start': 213,\n",
" 'end': 219},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9984366,\n",
" 'index': 53,\n",
" 'word': '▁five',\n",
" 'start': 220,\n",
" 'end': 224},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99829,\n",
" 'index': 54,\n",
" 'word': '▁years',\n",
" 'start': 225,\n",
" 'end': 230},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9984238,\n",
" 'index': 55,\n",
" 'word': '▁ago',\n",
" 'start': 231,\n",
" 'end': 234},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9984384,\n",
" 'index': 56,\n",
" 'word': ',',\n",
" 'start': 234,\n",
" 'end': 235},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98689705,\n",
" 'index': 57,\n",
" 'word': '▁our',\n",
" 'start': 236,\n",
" 'end': 239},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.8483806,\n",
" 'index': 58,\n",
" 'word': '▁Viking',\n",
" 'start': 240,\n",
" 'end': 246},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.8561226,\n",
" 'index': 59,\n",
" 'word': '▁1',\n",
" 'start': 247,\n",
" 'end': 248},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.96529603,\n",
" 'index': 60,\n",
" 'word': '▁space',\n",
" 'start': 249,\n",
" 'end': 254},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9623912,\n",
" 'index': 61,\n",
" 'word': 'craft',\n",
" 'start': 254,\n",
" 'end': 259},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9976211,\n",
" 'index': 62,\n",
" 'word': '▁was',\n",
" 'start': 260,\n",
" 'end': 263},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99899536,\n",
" 'index': 63,\n",
" 'word': '▁circ',\n",
" 'start': 264,\n",
" 'end': 268},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99855524,\n",
" 'index': 64,\n",
" 'word': 'ling',\n",
" 'start': 268,\n",
" 'end': 272},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99938333,\n",
" 'index': 65,\n",
" 'word': '▁the',\n",
" 'start': 273,\n",
" 'end': 276},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9993606,\n",
" 'index': 66,\n",
" 'word': '▁planet',\n",
" 'start': 277,\n",
" 'end': 283},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99926203,\n",
" 'index': 67,\n",
" 'word': ',',\n",
" 'start': 283,\n",
" 'end': 284},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99685955,\n",
" 'index': 68,\n",
" 'word': '▁sna',\n",
" 'start': 285,\n",
" 'end': 288},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99678254,\n",
" 'index': 69,\n",
" 'word': 'pping',\n",
" 'start': 288,\n",
" 'end': 293},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9975854,\n",
" 'index': 70,\n",
" 'word': '▁photos',\n",
" 'start': 294,\n",
" 'end': 300},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9972172,\n",
" 'index': 71,\n",
" 'word': ',',\n",
" 'start': 300,\n",
" 'end': 301},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9966794,\n",
" 'index': 72,\n",
" 'word': '▁when',\n",
" 'start': 302,\n",
" 'end': 306},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99142355,\n",
" 'index': 73,\n",
" 'word': '▁it',\n",
" 'start': 307,\n",
" 'end': 309},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9804128,\n",
" 'index': 74,\n",
" 'word': '▁spot',\n",
" 'start': 310,\n",
" 'end': 314},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.97927594,\n",
" 'index': 75,\n",
" 'word': 'ted',\n",
" 'start': 314,\n",
" 'end': 317},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9854144,\n",
" 'index': 76,\n",
" 'word': '▁the',\n",
" 'start': 318,\n",
" 'end': 321},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98293096,\n",
" 'index': 77,\n",
" 'word': '▁shadow',\n",
" 'start': 322,\n",
" 'end': 328},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9837083,\n",
" 'index': 78,\n",
" 'word': 'y',\n",
" 'start': 328,\n",
" 'end': 329},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9853166,\n",
" 'index': 79,\n",
" 'word': '▁like',\n",
" 'start': 330,\n",
" 'end': 334},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9849769,\n",
" 'index': 80,\n",
" 'word': 'ness',\n",
" 'start': 334,\n",
" 'end': 338},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99710613,\n",
" 'index': 81,\n",
" 'word': '▁of',\n",
" 'start': 339,\n",
" 'end': 341},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99454665,\n",
" 'index': 82,\n",
" 'word': '▁a',\n",
" 'start': 342,\n",
" 'end': 343},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9947602,\n",
" 'index': 83,\n",
" 'word': '▁human',\n",
" 'start': 344,\n",
" 'end': 349},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9950589,\n",
" 'index': 84,\n",
" 'word': '▁face',\n",
" 'start': 350,\n",
" 'end': 354},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99469876,\n",
" 'index': 85,\n",
" 'word': '.',\n",
" 'start': 354,\n",
" 'end': 355},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98264503,\n",
" 'index': 86,\n",
" 'word': '▁Us',\n",
" 'start': 356,\n",
" 'end': 358},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9652982,\n",
" 'index': 87,\n",
" 'word': '▁scientist',\n",
" 'start': 359,\n",
" 'end': 368},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9688392,\n",
" 'index': 88,\n",
" 'word': 's',\n",
" 'start': 368,\n",
" 'end': 369},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.93723047,\n",
" 'index': 89,\n",
" 'word': '▁figure',\n",
" 'start': 370,\n",
" 'end': 376},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9343609,\n",
" 'index': 90,\n",
" 'word': 'd',\n",
" 'start': 376,\n",
" 'end': 377},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9389645,\n",
" 'index': 91,\n",
" 'word': '▁out',\n",
" 'start': 378,\n",
" 'end': 381},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9976173,\n",
" 'index': 92,\n",
" 'word': '▁that',\n",
" 'start': 382,\n",
" 'end': 386},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9985746,\n",
" 'index': 93,\n",
" 'word': '▁it',\n",
" 'start': 387,\n",
" 'end': 389},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9990897,\n",
" 'index': 94,\n",
" 'word': '▁was',\n",
" 'start': 390,\n",
" 'end': 393},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9986003,\n",
" 'index': 95,\n",
" 'word': '▁just',\n",
" 'start': 394,\n",
" 'end': 398},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99798214,\n",
" 'index': 96,\n",
" 'word': '▁another',\n",
" 'start': 399,\n",
" 'end': 406},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9982463,\n",
" 'index': 97,\n",
" 'word': '▁Marti',\n",
" 'start': 407,\n",
" 'end': 412},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9976306,\n",
" 'index': 98,\n",
" 'word': 'an',\n",
" 'start': 412,\n",
" 'end': 414},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99826413,\n",
" 'index': 99,\n",
" 'word': '▁mesa',\n",
" 'start': 415,\n",
" 'end': 419},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9986713,\n",
" 'index': 100,\n",
" 'word': ',',\n",
" 'start': 419,\n",
" 'end': 420},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9990752,\n",
" 'index': 101,\n",
" 'word': '▁common',\n",
" 'start': 421,\n",
" 'end': 427},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9991449,\n",
" 'index': 102,\n",
" 'word': '▁around',\n",
" 'start': 428,\n",
" 'end': 434},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9987953,\n",
" 'index': 103,\n",
" 'word': '▁Cy',\n",
" 'start': 435,\n",
" 'end': 437},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9988625,\n",
" 'index': 104,\n",
" 'word': 'do',\n",
" 'start': 437,\n",
" 'end': 439},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99890935,\n",
" 'index': 105,\n",
" 'word': 'nia',\n",
" 'start': 439,\n",
" 'end': 442},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9983901,\n",
" 'index': 106,\n",
" 'word': ',',\n",
" 'start': 442,\n",
" 'end': 443},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98953724,\n",
" 'index': 107,\n",
" 'word': '▁only',\n",
" 'start': 444,\n",
" 'end': 448},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.97681797,\n",
" 'index': 108,\n",
" 'word': '▁this',\n",
" 'start': 449,\n",
" 'end': 453},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9744693,\n",
" 'index': 109,\n",
" 'word': '▁one',\n",
" 'start': 454,\n",
" 'end': 457},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98362905,\n",
" 'index': 110,\n",
" 'word': '▁had',\n",
" 'start': 458,\n",
" 'end': 461},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9713618,\n",
" 'index': 111,\n",
" 'word': '▁shadow',\n",
" 'start': 462,\n",
" 'end': 468},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9737309,\n",
" 'index': 112,\n",
" 'word': 's',\n",
" 'start': 468,\n",
" 'end': 469},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9887904,\n",
" 'index': 113,\n",
" 'word': '▁that',\n",
" 'start': 470,\n",
" 'end': 474},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9857012,\n",
" 'index': 114,\n",
" 'word': '▁made',\n",
" 'start': 475,\n",
" 'end': 479},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98496616,\n",
" 'index': 115,\n",
" 'word': '▁it',\n",
" 'start': 480,\n",
" 'end': 482},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9844059,\n",
" 'index': 116,\n",
" 'word': '▁look',\n",
" 'start': 483,\n",
" 'end': 487},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98615867,\n",
" 'index': 117,\n",
" 'word': '▁like',\n",
" 'start': 488,\n",
" 'end': 492},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98830485,\n",
" 'index': 118,\n",
" 'word': '▁an',\n",
" 'start': 493,\n",
" 'end': 495},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9811522,\n",
" 'index': 119,\n",
" 'word': '▁Egypt',\n",
" 'start': 496,\n",
" 'end': 501},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9806471,\n",
" 'index': 120,\n",
" 'word': 'ion',\n",
" 'start': 501,\n",
" 'end': 504},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9806395,\n",
" 'index': 121,\n",
" 'word': '▁Phar',\n",
" 'start': 505,\n",
" 'end': 509},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9844712,\n",
" 'index': 122,\n",
" 'word': 'a',\n",
" 'start': 509,\n",
" 'end': 510},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9841697,\n",
" 'index': 123,\n",
" 'word': 'oh',\n",
" 'start': 510,\n",
" 'end': 512},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9892211,\n",
" 'index': 124,\n",
" 'word': '.',\n",
" 'start': 512,\n",
" 'end': 513},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9986187,\n",
" 'index': 125,\n",
" 'word': '▁Very',\n",
" 'start': 514,\n",
" 'end': 518},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99793774,\n",
" 'index': 126,\n",
" 'word': '▁few',\n",
" 'start': 519,\n",
" 'end': 522},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99664587,\n",
" 'index': 127,\n",
" 'word': '▁days',\n",
" 'start': 523,\n",
" 'end': 527},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99783033,\n",
" 'index': 128,\n",
" 'word': '▁later',\n",
" 'start': 528,\n",
" 'end': 533},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9982863,\n",
" 'index': 129,\n",
" 'word': ',',\n",
" 'start': 533,\n",
" 'end': 534},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.94166446,\n",
" 'index': 130,\n",
" 'word': '▁we',\n",
" 'start': 535,\n",
" 'end': 537},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.85345703,\n",
" 'index': 131,\n",
" 'word': '▁reveal',\n",
" 'start': 538,\n",
" 'end': 544},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.8374294,\n",
" 'index': 132,\n",
" 'word': 'ed',\n",
" 'start': 544,\n",
" 'end': 546},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.8623249,\n",
" 'index': 133,\n",
" 'word': '▁the',\n",
" 'start': 547,\n",
" 'end': 550},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.8417424,\n",
" 'index': 134,\n",
" 'word': '▁image',\n",
" 'start': 551,\n",
" 'end': 556},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98973304,\n",
" 'index': 135,\n",
" 'word': '▁for',\n",
" 'start': 557,\n",
" 'end': 560},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9759,\n",
" 'index': 136,\n",
" 'word': '▁all',\n",
" 'start': 561,\n",
" 'end': 564},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99520445,\n",
" 'index': 137,\n",
" 'word': '▁to',\n",
" 'start': 565,\n",
" 'end': 567},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99369144,\n",
" 'index': 138,\n",
" 'word': '▁see',\n",
" 'start': 568,\n",
" 'end': 571},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99321544,\n",
" 'index': 139,\n",
" 'word': ',',\n",
" 'start': 571,\n",
" 'end': 572},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9991154,\n",
" 'index': 140,\n",
" 'word': '▁and',\n",
" 'start': 573,\n",
" 'end': 576},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9958181,\n",
" 'index': 141,\n",
" 'word': '▁we',\n",
" 'start': 577,\n",
" 'end': 579},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99414885,\n",
" 'index': 142,\n",
" 'word': '▁made',\n",
" 'start': 580,\n",
" 'end': 584},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9957287,\n",
" 'index': 143,\n",
" 'word': '▁sure',\n",
" 'start': 585,\n",
" 'end': 589},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9959401,\n",
" 'index': 144,\n",
" 'word': '▁to',\n",
" 'start': 590,\n",
" 'end': 592},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9936752,\n",
" 'index': 145,\n",
" 'word': '▁note',\n",
" 'start': 593,\n",
" 'end': 597},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.998691,\n",
" 'index': 146,\n",
" 'word': '▁that',\n",
" 'start': 598,\n",
" 'end': 602},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9956762,\n",
" 'index': 147,\n",
" 'word': '▁it',\n",
" 'start': 603,\n",
" 'end': 605},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99795747,\n",
" 'index': 148,\n",
" 'word': '▁was',\n",
" 'start': 606,\n",
" 'end': 609},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9948554,\n",
" 'index': 149,\n",
" 'word': '▁a',\n",
" 'start': 610,\n",
" 'end': 611},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.992023,\n",
" 'index': 150,\n",
" 'word': '▁huge',\n",
" 'start': 612,\n",
" 'end': 616},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.97803485,\n",
" 'index': 151,\n",
" 'word': '▁rock',\n",
" 'start': 617,\n",
" 'end': 621},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9896576,\n",
" 'index': 152,\n",
" 'word': '▁formation',\n",
" 'start': 622,\n",
" 'end': 631},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.999468,\n",
" 'index': 153,\n",
" 'word': '▁that',\n",
" 'start': 632,\n",
" 'end': 636},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9987307,\n",
" 'index': 154,\n",
" 'word': '▁just',\n",
" 'start': 637,\n",
" 'end': 641},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9978719,\n",
" 'index': 155,\n",
" 'word': '▁rese',\n",
" 'start': 642,\n",
" 'end': 646},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99729806,\n",
" 'index': 156,\n",
" 'word': 'mble',\n",
" 'start': 646,\n",
" 'end': 650},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9971686,\n",
" 'index': 157,\n",
" 'word': 'd',\n",
" 'start': 650,\n",
" 'end': 651},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99888545,\n",
" 'index': 158,\n",
" 'word': '▁a',\n",
" 'start': 652,\n",
" 'end': 653},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9983923,\n",
" 'index': 159,\n",
" 'word': '▁human',\n",
" 'start': 654,\n",
" 'end': 659},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9969125,\n",
" 'index': 160,\n",
" 'word': '▁head',\n",
" 'start': 660,\n",
" 'end': 664},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99962246,\n",
" 'index': 161,\n",
" 'word': '▁and',\n",
" 'start': 665,\n",
" 'end': 668},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99936336,\n",
" 'index': 162,\n",
" 'word': '▁face',\n",
" 'start': 669,\n",
" 'end': 673},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99953985,\n",
" 'index': 163,\n",
" 'word': ',',\n",
" 'start': 673,\n",
" 'end': 674},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9995252,\n",
" 'index': 164,\n",
" 'word': '▁but',\n",
" 'start': 675,\n",
" 'end': 678},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9984201,\n",
" 'index': 165,\n",
" 'word': '▁all',\n",
" 'start': 679,\n",
" 'end': 682},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9990183,\n",
" 'index': 166,\n",
" 'word': '▁of',\n",
" 'start': 683,\n",
" 'end': 685},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9980836,\n",
" 'index': 167,\n",
" 'word': '▁it',\n",
" 'start': 686,\n",
" 'end': 688},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99912924,\n",
" 'index': 168,\n",
" 'word': '▁was',\n",
" 'start': 689,\n",
" 'end': 692},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9986325,\n",
" 'index': 169,\n",
" 'word': '▁for',\n",
" 'start': 693,\n",
" 'end': 696},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99902165,\n",
" 'index': 170,\n",
" 'word': 'med',\n",
" 'start': 696,\n",
" 'end': 699},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99891686,\n",
" 'index': 171,\n",
" 'word': '▁by',\n",
" 'start': 700,\n",
" 'end': 702},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9992654,\n",
" 'index': 172,\n",
" 'word': '▁shadow',\n",
" 'start': 703,\n",
" 'end': 709},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99873906,\n",
" 'index': 173,\n",
" 'word': 's',\n",
" 'start': 709,\n",
" 'end': 710},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9986755,\n",
" 'index': 174,\n",
" 'word': '.',\n",
" 'start': 710,\n",
" 'end': 711},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9940135,\n",
" 'index': 175,\n",
" 'word': '▁We',\n",
" 'start': 712,\n",
" 'end': 714},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9937384,\n",
" 'index': 176,\n",
" 'word': '▁only',\n",
" 'start': 715,\n",
" 'end': 719},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98741966,\n",
" 'index': 177,\n",
" 'word': '▁announced',\n",
" 'start': 720,\n",
" 'end': 729},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9905053,\n",
" 'index': 178,\n",
" 'word': '▁it',\n",
" 'start': 730,\n",
" 'end': 732},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99872977,\n",
" 'index': 179,\n",
" 'word': '▁because',\n",
" 'start': 733,\n",
" 'end': 740},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99781847,\n",
" 'index': 180,\n",
" 'word': '▁we',\n",
" 'start': 741,\n",
" 'end': 743},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99807006,\n",
" 'index': 181,\n",
" 'word': '▁thought',\n",
" 'start': 744,\n",
" 'end': 751},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99812585,\n",
" 'index': 182,\n",
" 'word': '▁it',\n",
" 'start': 752,\n",
" 'end': 754},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9987987,\n",
" 'index': 183,\n",
" 'word': '▁would',\n",
" 'start': 755,\n",
" 'end': 760},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99814045,\n",
" 'index': 184,\n",
" 'word': '▁be',\n",
" 'start': 761,\n",
" 'end': 763},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9968272,\n",
" 'index': 185,\n",
" 'word': '▁a',\n",
" 'start': 764,\n",
" 'end': 765},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9960741,\n",
" 'index': 186,\n",
" 'word': '▁good',\n",
" 'start': 766,\n",
" 'end': 770},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9928383,\n",
" 'index': 187,\n",
" 'word': '▁way',\n",
" 'start': 771,\n",
" 'end': 774},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99515605,\n",
" 'index': 188,\n",
" 'word': '▁to',\n",
" 'start': 775,\n",
" 'end': 777},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9906125,\n",
" 'index': 189,\n",
" 'word': '▁engage',\n",
" 'start': 778,\n",
" 'end': 784},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9925929,\n",
" 'index': 190,\n",
" 'word': '▁the',\n",
" 'start': 785,\n",
" 'end': 788},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99023867,\n",
" 'index': 191,\n",
" 'word': '▁public',\n",
" 'start': 789,\n",
" 'end': 795},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99890065,\n",
" 'index': 192,\n",
" 'word': '▁with',\n",
" 'start': 796,\n",
" 'end': 800},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9960693,\n",
" 'index': 193,\n",
" 'word': '▁NASA',\n",
" 'start': 801,\n",
" 'end': 805},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9986105,\n",
" 'index': 194,\n",
" 'word': \"'\",\n",
" 'start': 805,\n",
" 'end': 806},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9988933,\n",
" 'index': 195,\n",
" 'word': 's',\n",
" 'start': 806,\n",
" 'end': 807},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9991429,\n",
" 'index': 196,\n",
" 'word': '▁finding',\n",
" 'start': 808,\n",
" 'end': 815},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99926883,\n",
" 'index': 197,\n",
" 'word': 's',\n",
" 'start': 815,\n",
" 'end': 816},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99931073,\n",
" 'index': 198,\n",
" 'word': ',',\n",
" 'start': 816,\n",
" 'end': 817},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99953973,\n",
" 'index': 199,\n",
" 'word': '▁and',\n",
" 'start': 818,\n",
" 'end': 821},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99747664,\n",
" 'index': 200,\n",
" 'word': '▁at',\n",
" 'start': 822,\n",
" 'end': 824},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9958331,\n",
" 'index': 201,\n",
" 'word': 'rra',\n",
" 'start': 824,\n",
" 'end': 827},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99711335,\n",
" 'index': 202,\n",
" 'word': 'ct',\n",
" 'start': 827,\n",
" 'end': 829},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99565244,\n",
" 'index': 203,\n",
" 'word': '▁attention',\n",
" 'start': 830,\n",
" 'end': 839},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99948764,\n",
" 'index': 204,\n",
" 'word': '▁to',\n",
" 'start': 840,\n",
" 'end': 842},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9992853,\n",
" 'index': 205,\n",
" 'word': '▁Mars',\n",
" 'start': 843,\n",
" 'end': 847},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9996164,\n",
" 'index': 206,\n",
" 'word': '-',\n",
" 'start': 847,\n",
" 'end': 848},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99963045,\n",
" 'index': 207,\n",
" 'word': '-',\n",
" 'start': 848,\n",
" 'end': 849},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9995757,\n",
" 'index': 208,\n",
" 'word': '▁and',\n",
" 'start': 850,\n",
" 'end': 853},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9984617,\n",
" 'index': 209,\n",
" 'word': '▁it',\n",
" 'start': 854,\n",
" 'end': 856},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9978162,\n",
" 'index': 210,\n",
" 'word': '▁did',\n",
" 'start': 857,\n",
" 'end': 860},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99782467,\n",
" 'index': 211,\n",
" 'word': '.',\n",
" 'start': 860,\n",
" 'end': 861},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9902649,\n",
" 'index': 212,\n",
" 'word': '▁The',\n",
" 'start': 863,\n",
" 'end': 866},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9821962,\n",
" 'index': 213,\n",
" 'word': '▁face',\n",
" 'start': 867,\n",
" 'end': 871},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98453516,\n",
" 'index': 214,\n",
" 'word': '▁on',\n",
" 'start': 872,\n",
" 'end': 874},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98241407,\n",
" 'index': 215,\n",
" 'word': '▁Mars',\n",
" 'start': 875,\n",
" 'end': 879},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9981958,\n",
" 'index': 216,\n",
" 'word': '▁soon',\n",
" 'start': 880,\n",
" 'end': 884},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9987692,\n",
" 'index': 217,\n",
" 'word': '▁became',\n",
" 'start': 885,\n",
" 'end': 891},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9987563,\n",
" 'index': 218,\n",
" 'word': '▁a',\n",
" 'start': 892,\n",
" 'end': 893},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99791545,\n",
" 'index': 219,\n",
" 'word': '▁pop',\n",
" 'start': 894,\n",
" 'end': 897},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9983084,\n",
" 'index': 220,\n",
" 'word': '▁icon',\n",
" 'start': 898,\n",
" 'end': 902},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9985397,\n",
" 'index': 221,\n",
" 'word': ';',\n",
" 'start': 902,\n",
" 'end': 903},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.986997,\n",
" 'index': 222,\n",
" 'word': '▁shot',\n",
" 'start': 904,\n",
" 'end': 908},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99500024,\n",
" 'index': 223,\n",
" 'word': '▁in',\n",
" 'start': 909,\n",
" 'end': 911},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9940572,\n",
" 'index': 224,\n",
" 'word': '▁movies',\n",
" 'start': 912,\n",
" 'end': 918},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9948042,\n",
" 'index': 225,\n",
" 'word': ',',\n",
" 'start': 918,\n",
" 'end': 919},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9830314,\n",
" 'index': 226,\n",
" 'word': '▁appeared',\n",
" 'start': 920,\n",
" 'end': 928},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99576855,\n",
" 'index': 227,\n",
" 'word': '▁in',\n",
" 'start': 929,\n",
" 'end': 931},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9962681,\n",
" 'index': 228,\n",
" 'word': '▁books',\n",
" 'start': 932,\n",
" 'end': 937},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.997297,\n",
" 'index': 229,\n",
" 'word': ',',\n",
" 'start': 937,\n",
" 'end': 938},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9969189,\n",
" 'index': 230,\n",
" 'word': '▁magazine',\n",
" 'start': 939,\n",
" 'end': 947},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9965844,\n",
" 'index': 231,\n",
" 'word': 's',\n",
" 'start': 947,\n",
" 'end': 948},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9970284,\n",
" 'index': 232,\n",
" 'word': ',',\n",
" 'start': 948,\n",
" 'end': 949},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9969007,\n",
" 'index': 233,\n",
" 'word': '▁radio',\n",
" 'start': 950,\n",
" 'end': 955},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99603075,\n",
" 'index': 234,\n",
" 'word': '▁talk',\n",
" 'start': 956,\n",
" 'end': 960},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99737513,\n",
" 'index': 235,\n",
" 'word': '▁shows',\n",
" 'start': 961,\n",
" 'end': 966},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9977088,\n",
" 'index': 236,\n",
" 'word': ',',\n",
" 'start': 966,\n",
" 'end': 967},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9976942,\n",
" 'index': 237,\n",
" 'word': '▁and',\n",
" 'start': 968,\n",
" 'end': 971},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9749291,\n",
" 'index': 238,\n",
" 'word': '▁ha',\n",
" 'start': 972,\n",
" 'end': 974},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9755974,\n",
" 'index': 239,\n",
" 'word': 'un',\n",
" 'start': 974,\n",
" 'end': 976},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.97456145,\n",
" 'index': 240,\n",
" 'word': 'ted',\n",
" 'start': 976,\n",
" 'end': 979},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.97627914,\n",
" 'index': 241,\n",
" 'word': '▁gro',\n",
" 'start': 980,\n",
" 'end': 983},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9766211,\n",
" 'index': 242,\n",
" 'word': 'cer',\n",
" 'start': 983,\n",
" 'end': 986},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9755186,\n",
" 'index': 243,\n",
" 'word': 'y',\n",
" 'start': 986,\n",
" 'end': 987},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.97471875,\n",
" 'index': 244,\n",
" 'word': '▁store',\n",
" 'start': 988,\n",
" 'end': 993},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.97767866,\n",
" 'index': 245,\n",
" 'word': '▁check',\n",
" 'start': 994,\n",
" 'end': 999},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.97759926,\n",
" 'index': 246,\n",
" 'word': 'out',\n",
" 'start': 999,\n",
" 'end': 1002},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98035,\n",
" 'index': 247,\n",
" 'word': '▁lines',\n",
" 'start': 1003,\n",
" 'end': 1008},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99741083,\n",
" 'index': 248,\n",
" 'word': '▁for',\n",
" 'start': 1009,\n",
" 'end': 1012},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99673516,\n",
" 'index': 249,\n",
" 'word': '▁25',\n",
" 'start': 1013,\n",
" 'end': 1015},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99663204,\n",
" 'index': 250,\n",
" 'word': '▁years',\n",
" 'start': 1016,\n",
" 'end': 1021},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9973544,\n",
" 'index': 251,\n",
" 'word': '.',\n",
" 'start': 1021,\n",
" 'end': 1022},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9906943,\n",
" 'index': 252,\n",
" 'word': '▁Some',\n",
" 'start': 1023,\n",
" 'end': 1027},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9870733,\n",
" 'index': 253,\n",
" 'word': '▁people',\n",
" 'start': 1028,\n",
" 'end': 1034},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98770535,\n",
" 'index': 254,\n",
" 'word': '▁thought',\n",
" 'start': 1035,\n",
" 'end': 1042},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98852706,\n",
" 'index': 255,\n",
" 'word': '▁the',\n",
" 'start': 1043,\n",
" 'end': 1046},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.97648376,\n",
" 'index': 256,\n",
" 'word': '▁natural',\n",
" 'start': 1047,\n",
" 'end': 1054},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98154485,\n",
" 'index': 257,\n",
" 'word': '▁land',\n",
" 'start': 1055,\n",
" 'end': 1059},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9710462,\n",
" 'index': 258,\n",
" 'word': 'form',\n",
" 'start': 1059,\n",
" 'end': 1063},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9930674,\n",
" 'index': 259,\n",
" 'word': '▁was',\n",
" 'start': 1064,\n",
" 'end': 1067},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9904585,\n",
" 'index': 260,\n",
" 'word': '▁evidence',\n",
" 'start': 1068,\n",
" 'end': 1076},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99835026,\n",
" 'index': 261,\n",
" 'word': '▁of',\n",
" 'start': 1077,\n",
" 'end': 1079},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.96821344,\n",
" 'index': 262,\n",
" 'word': '▁life',\n",
" 'start': 1080,\n",
" 'end': 1084},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9988362,\n",
" 'index': 263,\n",
" 'word': '▁on',\n",
" 'start': 1085,\n",
" 'end': 1087},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99900466,\n",
" 'index': 264,\n",
" 'word': '▁Mars',\n",
" 'start': 1088,\n",
" 'end': 1092},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9994442,\n",
" 'index': 265,\n",
" 'word': ',',\n",
" 'start': 1092,\n",
" 'end': 1093},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99967086,\n",
" 'index': 266,\n",
" 'word': '▁and',\n",
" 'start': 1094,\n",
" 'end': 1097},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9990483,\n",
" 'index': 267,\n",
" 'word': '▁that',\n",
" 'start': 1098,\n",
" 'end': 1102},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99356836,\n",
" 'index': 268,\n",
" 'word': '▁us',\n",
" 'start': 1103,\n",
" 'end': 1105},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9960284,\n",
" 'index': 269,\n",
" 'word': '▁scientist',\n",
" 'start': 1106,\n",
" 'end': 1115},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9963697,\n",
" 'index': 270,\n",
" 'word': 's',\n",
" 'start': 1115,\n",
" 'end': 1116},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9983851,\n",
" 'index': 271,\n",
" 'word': '▁wanted',\n",
" 'start': 1117,\n",
" 'end': 1123},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9986203,\n",
" 'index': 272,\n",
" 'word': '▁to',\n",
" 'start': 1124,\n",
" 'end': 1126},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9983407,\n",
" 'index': 273,\n",
" 'word': '▁hi',\n",
" 'start': 1127,\n",
" 'end': 1129},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99833196,\n",
" 'index': 274,\n",
" 'word': 'de',\n",
" 'start': 1129,\n",
" 'end': 1131},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99933356,\n",
" 'index': 275,\n",
" 'word': '▁it',\n",
" 'start': 1132,\n",
" 'end': 1134},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99929845,\n",
" 'index': 276,\n",
" 'word': ',',\n",
" 'start': 1134,\n",
" 'end': 1135},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99945015,\n",
" 'index': 277,\n",
" 'word': '▁but',\n",
" 'start': 1136,\n",
" 'end': 1139},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.999064,\n",
" 'index': 278,\n",
" 'word': '▁really',\n",
" 'start': 1140,\n",
" 'end': 1146},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.998408,\n",
" 'index': 279,\n",
" 'word': ',',\n",
" 'start': 1146,\n",
" 'end': 1147},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9786945,\n",
" 'index': 280,\n",
" 'word': '▁the',\n",
" 'start': 1148,\n",
" 'end': 1151},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9525546,\n",
" 'index': 281,\n",
" 'word': '▁defender',\n",
" 'start': 1152,\n",
" 'end': 1160},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9473736,\n",
" 'index': 282,\n",
" 'word': 's',\n",
" 'start': 1160,\n",
" 'end': 1161},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9641373,\n",
" 'index': 283,\n",
" 'word': '▁of',\n",
" 'start': 1162,\n",
" 'end': 1164},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9595204,\n",
" 'index': 284,\n",
" 'word': '▁the',\n",
" 'start': 1165,\n",
" 'end': 1168},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9465081,\n",
" 'index': 285,\n",
" 'word': '▁NASA',\n",
" 'start': 1169,\n",
" 'end': 1173},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.956375,\n",
" 'index': 286,\n",
" 'word': '▁budget',\n",
" 'start': 1174,\n",
" 'end': 1180},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9938366,\n",
" 'index': 287,\n",
" 'word': '▁wish',\n",
" 'start': 1181,\n",
" 'end': 1185},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99663156,\n",
" 'index': 288,\n",
" 'word': '▁there',\n",
" 'start': 1186,\n",
" 'end': 1191},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9960407,\n",
" 'index': 289,\n",
" 'word': '▁was',\n",
" 'start': 1192,\n",
" 'end': 1195},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9943784,\n",
" 'index': 290,\n",
" 'word': '▁an',\n",
" 'start': 1196,\n",
" 'end': 1198},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9929704,\n",
" 'index': 291,\n",
" 'word': 'cient',\n",
" 'start': 1198,\n",
" 'end': 1203},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9938042,\n",
" 'index': 292,\n",
" 'word': '▁civiliza',\n",
" 'start': 1204,\n",
" 'end': 1212},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99334246,\n",
" 'index': 293,\n",
" 'word': 'tion',\n",
" 'start': 1212,\n",
" 'end': 1216},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9981047,\n",
" 'index': 294,\n",
" 'word': '▁on',\n",
" 'start': 1217,\n",
" 'end': 1219},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99802125,\n",
" 'index': 295,\n",
" 'word': '▁Mars',\n",
" 'start': 1220,\n",
" 'end': 1224},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9974251,\n",
" 'index': 296,\n",
" 'word': '.',\n",
" 'start': 1224,\n",
" 'end': 1225},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99758637,\n",
" 'index': 297,\n",
" 'word': '▁We',\n",
" 'start': 1226,\n",
" 'end': 1228},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99532855,\n",
" 'index': 298,\n",
" 'word': '▁decided',\n",
" 'start': 1229,\n",
" 'end': 1236},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9968822,\n",
" 'index': 299,\n",
" 'word': '▁to',\n",
" 'start': 1237,\n",
" 'end': 1239},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99047685,\n",
" 'index': 300,\n",
" 'word': '▁take',\n",
" 'start': 1240,\n",
" 'end': 1244},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98163795,\n",
" 'index': 301,\n",
" 'word': '▁another',\n",
" 'start': 1245,\n",
" 'end': 1252},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.981338,\n",
" 'index': 302,\n",
" 'word': '▁shot',\n",
" 'start': 1253,\n",
" 'end': 1257},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9976725,\n",
" 'index': 303,\n",
" 'word': '▁just',\n",
" 'start': 1258,\n",
" 'end': 1262},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9989598,\n",
" 'index': 304,\n",
" 'word': '▁to',\n",
" 'start': 1263,\n",
" 'end': 1265},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99915683,\n",
" 'index': 305,\n",
" 'word': '▁make',\n",
" 'start': 1266,\n",
" 'end': 1270},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9990343,\n",
" 'index': 306,\n",
" 'word': '▁sure',\n",
" 'start': 1271,\n",
" 'end': 1275},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9994629,\n",
" 'index': 307,\n",
" 'word': '▁we',\n",
" 'start': 1276,\n",
" 'end': 1278},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99965143,\n",
" 'index': 308,\n",
" 'word': '▁were',\n",
" 'start': 1279,\n",
" 'end': 1283},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9996381,\n",
" 'index': 309,\n",
" 'word': 'n',\n",
" 'start': 1283,\n",
" 'end': 1284},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9996587,\n",
" 'index': 310,\n",
" 'word': \"'\",\n",
" 'start': 1284,\n",
" 'end': 1285},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99964845,\n",
" 'index': 311,\n",
" 'word': 't',\n",
" 'start': 1285,\n",
" 'end': 1286},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9994722,\n",
" 'index': 312,\n",
" 'word': '▁wrong',\n",
" 'start': 1287,\n",
" 'end': 1292},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99955314,\n",
" 'index': 313,\n",
" 'word': ',',\n",
" 'start': 1292,\n",
" 'end': 1293},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99804926,\n",
" 'index': 314,\n",
" 'word': '▁on',\n",
" 'start': 1294,\n",
" 'end': 1296},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9966838,\n",
" 'index': 315,\n",
" 'word': '▁April',\n",
" 'start': 1297,\n",
" 'end': 1302},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99728274,\n",
" 'index': 316,\n",
" 'word': '▁5',\n",
" 'start': 1303,\n",
" 'end': 1304},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99812526,\n",
" 'index': 317,\n",
" 'word': ',',\n",
" 'start': 1304,\n",
" 'end': 1305},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99731195,\n",
" 'index': 318,\n",
" 'word': '▁1998.',\n",
" 'start': 1306,\n",
" 'end': 1311},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.6198232,\n",
" 'index': 319,\n",
" 'word': '▁Michael',\n",
" 'start': 1312,\n",
" 'end': 1319},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.6221296,\n",
" 'index': 320,\n",
" 'word': '▁Malin',\n",
" 'start': 1320,\n",
" 'end': 1325},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.95554626,\n",
" 'index': 321,\n",
" 'word': '▁and',\n",
" 'start': 1326,\n",
" 'end': 1329},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.93194216,\n",
" 'index': 322,\n",
" 'word': '▁his',\n",
" 'start': 1330,\n",
" 'end': 1333},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.7339511,\n",
" 'index': 323,\n",
" 'word': '▁Mars',\n",
" 'start': 1334,\n",
" 'end': 1338},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.7226213,\n",
" 'index': 324,\n",
" 'word': '▁Or',\n",
" 'start': 1339,\n",
" 'end': 1341},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.7723435,\n",
" 'index': 325,\n",
" 'word': 'bit',\n",
" 'start': 1341,\n",
" 'end': 1344},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.76182514,\n",
" 'index': 326,\n",
" 'word': 'er',\n",
" 'start': 1344,\n",
" 'end': 1346},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.87562317,\n",
" 'index': 327,\n",
" 'word': '▁camera',\n",
" 'start': 1347,\n",
" 'end': 1353},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.89518005,\n",
" 'index': 328,\n",
" 'word': '▁team',\n",
" 'start': 1354,\n",
" 'end': 1358},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9218035,\n",
" 'index': 329,\n",
" 'word': '▁took',\n",
" 'start': 1359,\n",
" 'end': 1363},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9225395,\n",
" 'index': 330,\n",
" 'word': '▁a',\n",
" 'start': 1364,\n",
" 'end': 1365},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.916193,\n",
" 'index': 331,\n",
" 'word': '▁picture',\n",
" 'start': 1366,\n",
" 'end': 1373},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9926691,\n",
" 'index': 332,\n",
" 'word': '▁that',\n",
" 'start': 1374,\n",
" 'end': 1378},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9904732,\n",
" 'index': 333,\n",
" 'word': '▁was',\n",
" 'start': 1379,\n",
" 'end': 1382},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98267406,\n",
" 'index': 334,\n",
" 'word': '▁ten',\n",
" 'start': 1383,\n",
" 'end': 1386},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9849161,\n",
" 'index': 335,\n",
" 'word': '▁times',\n",
" 'start': 1387,\n",
" 'end': 1392},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9801893,\n",
" 'index': 336,\n",
" 'word': '▁sharp',\n",
" 'start': 1393,\n",
" 'end': 1398},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98280776,\n",
" 'index': 337,\n",
" 'word': 'er',\n",
" 'start': 1398,\n",
" 'end': 1400},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99816483,\n",
" 'index': 338,\n",
" 'word': '▁than',\n",
" 'start': 1401,\n",
" 'end': 1405},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99864274,\n",
" 'index': 339,\n",
" 'word': '▁the',\n",
" 'start': 1406,\n",
" 'end': 1409},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9981275,\n",
" 'index': 340,\n",
" 'word': '▁original',\n",
" 'start': 1410,\n",
" 'end': 1418},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9918675,\n",
" 'index': 341,\n",
" 'word': '▁Viking',\n",
" 'start': 1419,\n",
" 'end': 1425},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99843293,\n",
" 'index': 342,\n",
" 'word': '▁photos',\n",
" 'start': 1426,\n",
" 'end': 1432},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99834,\n",
" 'index': 343,\n",
" 'word': ',',\n",
" 'start': 1432,\n",
" 'end': 1433},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99184215,\n",
" 'index': 344,\n",
" 'word': '▁reveal',\n",
" 'start': 1434,\n",
" 'end': 1440},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99084723,\n",
" 'index': 345,\n",
" 'word': 'ing',\n",
" 'start': 1440,\n",
" 'end': 1443},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9979195,\n",
" 'index': 346,\n",
" 'word': '▁a',\n",
" 'start': 1444,\n",
" 'end': 1445},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99878365,\n",
" 'index': 347,\n",
" 'word': '▁natural',\n",
" 'start': 1446,\n",
" 'end': 1453},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9988181,\n",
" 'index': 348,\n",
" 'word': '▁land',\n",
" 'start': 1454,\n",
" 'end': 1458},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9989188,\n",
" 'index': 349,\n",
" 'word': 'form',\n",
" 'start': 1458,\n",
" 'end': 1462},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9987502,\n",
" 'index': 350,\n",
" 'word': ',',\n",
" 'start': 1462,\n",
" 'end': 1463},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99949133,\n",
" 'index': 351,\n",
" 'word': '▁which',\n",
" 'start': 1464,\n",
" 'end': 1469},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99865425,\n",
" 'index': 352,\n",
" 'word': '▁meant',\n",
" 'start': 1470,\n",
" 'end': 1475},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9977582,\n",
" 'index': 353,\n",
" 'word': '▁no',\n",
" 'start': 1476,\n",
" 'end': 1478},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99797624,\n",
" 'index': 354,\n",
" 'word': '▁alien',\n",
" 'start': 1479,\n",
" 'end': 1484},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.998558,\n",
" 'index': 355,\n",
" 'word': '▁monument',\n",
" 'start': 1485,\n",
" 'end': 1493},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9985544,\n",
" 'index': 356,\n",
" 'word': '.',\n",
" 'start': 1493,\n",
" 'end': 1494},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9991504,\n",
" 'index': 357,\n",
" 'word': '▁\"',\n",
" 'start': 1495,\n",
" 'end': 1496},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99959415,\n",
" 'index': 358,\n",
" 'word': 'But',\n",
" 'start': 1496,\n",
" 'end': 1499},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99847144,\n",
" 'index': 359,\n",
" 'word': '▁that',\n",
" 'start': 1500,\n",
" 'end': 1504},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9965953,\n",
" 'index': 360,\n",
" 'word': '▁picture',\n",
" 'start': 1505,\n",
" 'end': 1512},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9989673,\n",
" 'index': 361,\n",
" 'word': '▁wasn',\n",
" 'start': 1513,\n",
" 'end': 1517},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.999243,\n",
" 'index': 362,\n",
" 'word': \"'\",\n",
" 'start': 1517,\n",
" 'end': 1518},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99926895,\n",
" 'index': 363,\n",
" 'word': 't',\n",
" 'start': 1518,\n",
" 'end': 1519},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9985392,\n",
" 'index': 364,\n",
" 'word': '▁very',\n",
" 'start': 1520,\n",
" 'end': 1524},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9978017,\n",
" 'index': 365,\n",
" 'word': '▁clear',\n",
" 'start': 1525,\n",
" 'end': 1530},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99971443,\n",
" 'index': 366,\n",
" 'word': '▁at',\n",
" 'start': 1531,\n",
" 'end': 1533},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99972814,\n",
" 'index': 367,\n",
" 'word': '▁all',\n",
" 'start': 1534,\n",
" 'end': 1537},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99969304,\n",
" 'index': 368,\n",
" 'word': ',',\n",
" 'start': 1537,\n",
" 'end': 1538},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99946886,\n",
" 'index': 369,\n",
" 'word': '▁which',\n",
" 'start': 1539,\n",
" 'end': 1544},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99923897,\n",
" 'index': 370,\n",
" 'word': '▁could',\n",
" 'start': 1545,\n",
" 'end': 1550},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9993623,\n",
" 'index': 371,\n",
" 'word': '▁mean',\n",
" 'start': 1551,\n",
" 'end': 1555},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99697757,\n",
" 'index': 372,\n",
" 'word': '▁alien',\n",
" 'start': 1556,\n",
" 'end': 1561},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9969267,\n",
" 'index': 373,\n",
" 'word': '▁mark',\n",
" 'start': 1562,\n",
" 'end': 1566},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99740356,\n",
" 'index': 374,\n",
" 'word': 'ings',\n",
" 'start': 1566,\n",
" 'end': 1570},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9990921,\n",
" 'index': 375,\n",
" 'word': '▁were',\n",
" 'start': 1571,\n",
" 'end': 1575},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9988734,\n",
" 'index': 376,\n",
" 'word': '▁hidden',\n",
" 'start': 1576,\n",
" 'end': 1582},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9993931,\n",
" 'index': 377,\n",
" 'word': '▁by',\n",
" 'start': 1583,\n",
" 'end': 1585},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9993395,\n",
" 'index': 378,\n",
" 'word': '▁ha',\n",
" 'start': 1586,\n",
" 'end': 1588},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9994405,\n",
" 'index': 379,\n",
" 'word': 'ze',\n",
" 'start': 1588,\n",
" 'end': 1590},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9996549,\n",
" 'index': 380,\n",
" 'word': '\"',\n",
" 'start': 1590,\n",
" 'end': 1591},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9997403,\n",
" 'index': 381,\n",
" 'word': '▁Well',\n",
" 'start': 1592,\n",
" 'end': 1596},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9997552,\n",
" 'index': 382,\n",
" 'word': '▁no',\n",
" 'start': 1597,\n",
" 'end': 1599},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9997204,\n",
" 'index': 383,\n",
" 'word': ',',\n",
" 'start': 1599,\n",
" 'end': 1600},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9994294,\n",
" 'index': 384,\n",
" 'word': '▁yes',\n",
" 'start': 1601,\n",
" 'end': 1604},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9963684,\n",
" 'index': 385,\n",
" 'word': '▁that',\n",
" 'start': 1605,\n",
" 'end': 1609},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9943778,\n",
" 'index': 386,\n",
" 'word': '▁rumor',\n",
" 'start': 1610,\n",
" 'end': 1615},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99859875,\n",
" 'index': 387,\n",
" 'word': '▁started',\n",
" 'start': 1616,\n",
" 'end': 1623},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99874413,\n",
" 'index': 388,\n",
" 'word': ',',\n",
" 'start': 1623,\n",
" 'end': 1624},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9991598,\n",
" 'index': 389,\n",
" 'word': '▁but',\n",
" 'start': 1625,\n",
" 'end': 1628},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9980592,\n",
" 'index': 390,\n",
" 'word': '▁to',\n",
" 'start': 1629,\n",
" 'end': 1631},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99275404,\n",
" 'index': 391,\n",
" 'word': '▁prove',\n",
" 'start': 1632,\n",
" 'end': 1637},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9936196,\n",
" 'index': 392,\n",
" 'word': '▁them',\n",
" 'start': 1638,\n",
" 'end': 1642},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9883024,\n",
" 'index': 393,\n",
" 'word': '▁wrong',\n",
" 'start': 1643,\n",
" 'end': 1648},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99685585,\n",
" 'index': 394,\n",
" 'word': '▁on',\n",
" 'start': 1649,\n",
" 'end': 1651},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9905654,\n",
" 'index': 395,\n",
" 'word': '▁April',\n",
" 'start': 1652,\n",
" 'end': 1657},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9897703,\n",
" 'index': 396,\n",
" 'word': '▁8',\n",
" 'start': 1658,\n",
" 'end': 1659},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9914705,\n",
" 'index': 397,\n",
" 'word': ',',\n",
" 'start': 1659,\n",
" 'end': 1660},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.989052,\n",
" 'index': 398,\n",
" 'word': '▁2001',\n",
" 'start': 1661,\n",
" 'end': 1665},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9988476,\n",
" 'index': 399,\n",
" 'word': '▁we',\n",
" 'start': 1666,\n",
" 'end': 1668},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9976255,\n",
" 'index': 400,\n",
" 'word': '▁decided',\n",
" 'start': 1669,\n",
" 'end': 1676},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9990896,\n",
" 'index': 401,\n",
" 'word': '▁to',\n",
" 'start': 1677,\n",
" 'end': 1679},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9982212,\n",
" 'index': 402,\n",
" 'word': '▁take',\n",
" 'start': 1680,\n",
" 'end': 1684},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9987219,\n",
" 'index': 403,\n",
" 'word': '▁another',\n",
" 'start': 1685,\n",
" 'end': 1692},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9987041,\n",
" 'index': 404,\n",
" 'word': '▁picture',\n",
" 'start': 1693,\n",
" 'end': 1700},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99892056,\n",
" 'index': 405,\n",
" 'word': ',',\n",
" 'start': 1700,\n",
" 'end': 1701},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9990138,\n",
" 'index': 406,\n",
" 'word': '▁making',\n",
" 'start': 1702,\n",
" 'end': 1708},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9987526,\n",
" 'index': 407,\n",
" 'word': '▁sure',\n",
" 'start': 1709,\n",
" 'end': 1713},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99886996,\n",
" 'index': 408,\n",
" 'word': '▁it',\n",
" 'start': 1714,\n",
" 'end': 1716},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99928397,\n",
" 'index': 409,\n",
" 'word': '▁was',\n",
" 'start': 1717,\n",
" 'end': 1720},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9986174,\n",
" 'index': 410,\n",
" 'word': '▁a',\n",
" 'start': 1721,\n",
" 'end': 1722},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9980627,\n",
" 'index': 411,\n",
" 'word': '▁cloud',\n",
" 'start': 1723,\n",
" 'end': 1728},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9979323,\n",
" 'index': 412,\n",
" 'word': 'less',\n",
" 'start': 1728,\n",
" 'end': 1732},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99851483,\n",
" 'index': 413,\n",
" 'word': '▁summer',\n",
" 'start': 1733,\n",
" 'end': 1739},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99867994,\n",
" 'index': 414,\n",
" 'word': '▁day',\n",
" 'start': 1740,\n",
" 'end': 1743},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9986368,\n",
" 'index': 415,\n",
" 'word': '.',\n",
" 'start': 1743,\n",
" 'end': 1744},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9535012,\n",
" 'index': 416,\n",
" 'word': '▁Malin',\n",
" 'start': 1745,\n",
" 'end': 1750},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.95024145,\n",
" 'index': 417,\n",
" 'word': \"'\",\n",
" 'start': 1750,\n",
" 'end': 1751},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9507696,\n",
" 'index': 418,\n",
" 'word': 's',\n",
" 'start': 1751,\n",
" 'end': 1752},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9469109,\n",
" 'index': 419,\n",
" 'word': '▁team',\n",
" 'start': 1753,\n",
" 'end': 1757},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.8001874,\n",
" 'index': 420,\n",
" 'word': '▁capture',\n",
" 'start': 1758,\n",
" 'end': 1765},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.7871362,\n",
" 'index': 421,\n",
" 'word': 'd',\n",
" 'start': 1765,\n",
" 'end': 1766},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.7254455,\n",
" 'index': 422,\n",
" 'word': '▁an',\n",
" 'start': 1767,\n",
" 'end': 1769},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.6367166,\n",
" 'index': 423,\n",
" 'word': '▁amazing',\n",
" 'start': 1770,\n",
" 'end': 1777},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.6151613,\n",
" 'index': 424,\n",
" 'word': '▁photo',\n",
" 'start': 1778,\n",
" 'end': 1783},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9940475,\n",
" 'index': 425,\n",
" 'word': '▁using',\n",
" 'start': 1784,\n",
" 'end': 1789},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99607027,\n",
" 'index': 426,\n",
" 'word': '▁the',\n",
" 'start': 1790,\n",
" 'end': 1793},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9946696,\n",
" 'index': 427,\n",
" 'word': '▁camera',\n",
" 'start': 1794,\n",
" 'end': 1800},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9957001,\n",
" 'index': 428,\n",
" 'word': \"'\",\n",
" 'start': 1800,\n",
" 'end': 1801},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99648213,\n",
" 'index': 429,\n",
" 'word': 's',\n",
" 'start': 1801,\n",
" 'end': 1802},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9934956,\n",
" 'index': 430,\n",
" 'word': '▁absolute',\n",
" 'start': 1803,\n",
" 'end': 1811},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9946866,\n",
" 'index': 431,\n",
" 'word': '▁maximum',\n",
" 'start': 1812,\n",
" 'end': 1819},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99592364,\n",
" 'index': 432,\n",
" 'word': '▁revolution',\n",
" 'start': 1820,\n",
" 'end': 1830},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99613345,\n",
" 'index': 433,\n",
" 'word': '.',\n",
" 'start': 1830,\n",
" 'end': 1831},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9986493,\n",
" 'index': 434,\n",
" 'word': '▁With',\n",
" 'start': 1832,\n",
" 'end': 1836},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98783743,\n",
" 'index': 435,\n",
" 'word': '▁this',\n",
" 'start': 1837,\n",
" 'end': 1841},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98299176,\n",
" 'index': 436,\n",
" 'word': '▁camera',\n",
" 'start': 1842,\n",
" 'end': 1848},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9948515,\n",
" 'index': 437,\n",
" 'word': '▁you',\n",
" 'start': 1849,\n",
" 'end': 1852},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99211496,\n",
" 'index': 438,\n",
" 'word': '▁can',\n",
" 'start': 1853,\n",
" 'end': 1856},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98975813,\n",
" 'index': 439,\n",
" 'word': '▁discern',\n",
" 'start': 1857,\n",
" 'end': 1864},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99309033,\n",
" 'index': 440,\n",
" 'word': '▁things',\n",
" 'start': 1865,\n",
" 'end': 1871},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99935037,\n",
" 'index': 441,\n",
" 'word': '▁in',\n",
" 'start': 1872,\n",
" 'end': 1874},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99924976,\n",
" 'index': 442,\n",
" 'word': '▁a',\n",
" 'start': 1875,\n",
" 'end': 1876},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9993017,\n",
" 'index': 443,\n",
" 'word': '▁digital',\n",
" 'start': 1877,\n",
" 'end': 1884},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99953175,\n",
" 'index': 444,\n",
" 'word': '▁image',\n",
" 'start': 1885,\n",
" 'end': 1890},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9992694,\n",
" 'index': 445,\n",
" 'word': ',',\n",
" 'start': 1890,\n",
" 'end': 1891},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9953614,\n",
" 'index': 446,\n",
" 'word': '▁3',\n",
" 'start': 1892,\n",
" 'end': 1893},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99575055,\n",
" 'index': 447,\n",
" 'word': '▁times',\n",
" 'start': 1894,\n",
" 'end': 1899},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9946045,\n",
" 'index': 448,\n",
" 'word': '▁bigger',\n",
" 'start': 1900,\n",
" 'end': 1906},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99767905,\n",
" 'index': 449,\n",
" 'word': '▁than',\n",
" 'start': 1907,\n",
" 'end': 1911},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9985954,\n",
" 'index': 450,\n",
" 'word': '▁the',\n",
" 'start': 1912,\n",
" 'end': 1915},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99617577,\n",
" 'index': 451,\n",
" 'word': '▁pixel',\n",
" 'start': 1916,\n",
" 'end': 1921},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9971623,\n",
" 'index': 452,\n",
" 'word': '▁size',\n",
" 'start': 1922,\n",
" 'end': 1926},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.999595,\n",
" 'index': 453,\n",
" 'word': '▁which',\n",
" 'start': 1927,\n",
" 'end': 1932},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99959654,\n",
" 'index': 454,\n",
" 'word': '▁means',\n",
" 'start': 1933,\n",
" 'end': 1938},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9997098,\n",
" 'index': 455,\n",
" 'word': '▁if',\n",
" 'start': 1939,\n",
" 'end': 1941},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9997428,\n",
" 'index': 456,\n",
" 'word': '▁there',\n",
" 'start': 1942,\n",
" 'end': 1947},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99971575,\n",
" 'index': 457,\n",
" 'word': '▁were',\n",
" 'start': 1948,\n",
" 'end': 1952},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9997061,\n",
" 'index': 458,\n",
" 'word': '▁any',\n",
" 'start': 1953,\n",
" 'end': 1956},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9994355,\n",
" 'index': 459,\n",
" 'word': '▁sign',\n",
" 'start': 1957,\n",
" 'end': 1961},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9995265,\n",
" 'index': 460,\n",
" 'word': 's',\n",
" 'start': 1961,\n",
" 'end': 1962},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99968946,\n",
" 'index': 461,\n",
" 'word': '▁of',\n",
" 'start': 1963,\n",
" 'end': 1965},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.96821344,\n",
" 'index': 462,\n",
" 'word': '▁life',\n",
" 'start': 1966,\n",
" 'end': 1970},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99971575,\n",
" 'index': 463,\n",
" 'word': ',',\n",
" 'start': 1970,\n",
" 'end': 1971},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99914134,\n",
" 'index': 464,\n",
" 'word': '▁you',\n",
" 'start': 1972,\n",
" 'end': 1975},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99893373,\n",
" 'index': 465,\n",
" 'word': '▁could',\n",
" 'start': 1976,\n",
" 'end': 1981},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99874496,\n",
" 'index': 466,\n",
" 'word': '▁easily',\n",
" 'start': 1982,\n",
" 'end': 1988},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99866235,\n",
" 'index': 467,\n",
" 'word': '▁see',\n",
" 'start': 1989,\n",
" 'end': 1992},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99949706,\n",
" 'index': 468,\n",
" 'word': '▁what',\n",
" 'start': 1993,\n",
" 'end': 1997},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9990375,\n",
" 'index': 469,\n",
" 'word': '▁they',\n",
" 'start': 1998,\n",
" 'end': 2002},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99939954,\n",
" 'index': 470,\n",
" 'word': '▁were',\n",
" 'start': 2003,\n",
" 'end': 2007},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9991054,\n",
" 'index': 471,\n",
" 'word': '.',\n",
" 'start': 2007,\n",
" 'end': 2008},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9985911,\n",
" 'index': 472,\n",
" 'word': '▁What',\n",
" 'start': 2009,\n",
" 'end': 2013},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99561054,\n",
" 'index': 473,\n",
" 'word': '▁the',\n",
" 'start': 2014,\n",
" 'end': 2017},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99237365,\n",
" 'index': 474,\n",
" 'word': '▁picture',\n",
" 'start': 2018,\n",
" 'end': 2025},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99209166,\n",
" 'index': 475,\n",
" 'word': '▁showed',\n",
" 'start': 2026,\n",
" 'end': 2032},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99855715,\n",
" 'index': 476,\n",
" 'word': '▁was',\n",
" 'start': 2033,\n",
" 'end': 2036},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9982992,\n",
" 'index': 477,\n",
" 'word': '▁the',\n",
" 'start': 2037,\n",
" 'end': 2040},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99486816,\n",
" 'index': 478,\n",
" 'word': '▁but',\n",
" 'start': 2041,\n",
" 'end': 2044},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9939652,\n",
" 'index': 479,\n",
" 'word': 'te',\n",
" 'start': 2044,\n",
" 'end': 2046},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9994578,\n",
" 'index': 480,\n",
" 'word': '▁or',\n",
" 'start': 2047,\n",
" 'end': 2049},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99937266,\n",
" 'index': 481,\n",
" 'word': '▁mesa',\n",
" 'start': 2050,\n",
" 'end': 2054},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9994253,\n",
" 'index': 482,\n",
" 'word': ',',\n",
" 'start': 2054,\n",
" 'end': 2055},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99890506,\n",
" 'index': 483,\n",
" 'word': '▁which',\n",
" 'start': 2056,\n",
" 'end': 2061},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99758005,\n",
" 'index': 484,\n",
" 'word': '▁are',\n",
" 'start': 2062,\n",
" 'end': 2065},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9942561,\n",
" 'index': 485,\n",
" 'word': '▁land',\n",
" 'start': 2066,\n",
" 'end': 2070},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9909231,\n",
" 'index': 486,\n",
" 'word': 'form',\n",
" 'start': 2070,\n",
" 'end': 2074},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9923442,\n",
" 'index': 487,\n",
" 'word': 's',\n",
" 'start': 2074,\n",
" 'end': 2075},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9924804,\n",
" 'index': 488,\n",
" 'word': '▁common',\n",
" 'start': 2076,\n",
" 'end': 2082},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.998198,\n",
" 'index': 489,\n",
" 'word': '▁around',\n",
" 'start': 2083,\n",
" 'end': 2089},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9988335,\n",
" 'index': 490,\n",
" 'word': '▁the',\n",
" 'start': 2090,\n",
" 'end': 2093},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99893695,\n",
" 'index': 491,\n",
" 'word': '▁American',\n",
" 'start': 2094,\n",
" 'end': 2102},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9991405,\n",
" 'index': 492,\n",
" 'word': '▁West',\n",
" 'start': 2103,\n",
" 'end': 2107},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99888045,\n",
" 'index': 493,\n",
" 'word': '.',\n",
" 'start': 2107,\n",
" 'end': 2108}]"
]
},
"execution_count": 95,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"dejanseo/LinkBERT-XL\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"dejanseo/LinkBERT-XL\")\n",
"nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"\n",
"ner_results = nlp(text)\n",
"ner_results"
]
},
{
"cell_type": "markdown",
"id": "96937981-4d0d-404c-ae4c-aefe77910390",
"metadata": {},
"source": [
"## 45 HiTZ/mbert-argmining-abstrct-en-es"
]
},
{
"cell_type": "code",
"execution_count": 97,
"id": "8e292687-4b7c-40ab-9add-32f7190694c6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 97,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"HiTZ/mbert-argmining-abstrct-en-es\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"HiTZ/mbert-argmining-abstrct-en-es\")\n",
"nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"\n",
"ner_results = nlp(text)\n",
"ner_results"
]
},
{
"cell_type": "markdown",
"id": "801bcc2a-74f5-4168-bd46-81080ded913e",
"metadata": {},
"source": [
"## 46 HiTZ/mdeberta-expl-extraction-multi "
]
},
{
"cell_type": "code",
"execution_count": 99,
"id": "0b463753-3823-4b84-b214-530b4b2d82db",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\NW\\anaconda3\\Lib\\site-packages\\transformers\\convert_slow_tokenizer.py:560: UserWarning: The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option which is not implemented in the fast tokenizers. In practice this means that the fast version of the tokenizer can produce unknown tokens whereas the sentencepiece version would have converted these unknown tokens into a sequence of byte tokens matching the original piece of text.\n",
" warnings.warn(\n",
"Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at HiTZ/mdeberta-expl-extraction-multi and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
"Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n"
]
},
{
"data": {
"text/plain": [
"[{'entity': 'LABEL_1',\n",
" 'score': 0.69964296,\n",
" 'index': 1,\n",
" 'word': '▁So',\n",
" 'start': 0,\n",
" 'end': 2},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.68438363,\n",
" 'index': 2,\n",
" 'word': ',',\n",
" 'start': 2,\n",
" 'end': 3},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76636976,\n",
" 'index': 3,\n",
" 'word': '▁if',\n",
" 'start': 3,\n",
" 'end': 6},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7428422,\n",
" 'index': 4,\n",
" 'word': '▁you',\n",
" 'start': 6,\n",
" 'end': 10},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72554,\n",
" 'index': 5,\n",
" 'word': \"'\",\n",
" 'start': 10,\n",
" 'end': 11},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73893636,\n",
" 'index': 6,\n",
" 'word': 're',\n",
" 'start': 11,\n",
" 'end': 13},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73846024,\n",
" 'index': 7,\n",
" 'word': '▁',\n",
" 'start': 13,\n",
" 'end': 14},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7346474,\n",
" 'index': 8,\n",
" 'word': 'a',\n",
" 'start': 14,\n",
" 'end': 15},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71560675,\n",
" 'index': 9,\n",
" 'word': '▁NASA',\n",
" 'start': 15,\n",
" 'end': 20},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7557671,\n",
" 'index': 10,\n",
" 'word': '▁',\n",
" 'start': 20,\n",
" 'end': 21},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7091787,\n",
" 'index': 11,\n",
" 'word': 'scientist',\n",
" 'start': 21,\n",
" 'end': 30},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.68782264,\n",
" 'index': 12,\n",
" 'word': ',',\n",
" 'start': 30,\n",
" 'end': 31},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73133516,\n",
" 'index': 13,\n",
" 'word': '▁you',\n",
" 'start': 31,\n",
" 'end': 35},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7256754,\n",
" 'index': 14,\n",
" 'word': '▁should',\n",
" 'start': 35,\n",
" 'end': 42},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7170361,\n",
" 'index': 15,\n",
" 'word': '▁be',\n",
" 'start': 42,\n",
" 'end': 45},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7384796,\n",
" 'index': 16,\n",
" 'word': '▁',\n",
" 'start': 45,\n",
" 'end': 46},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7560725,\n",
" 'index': 17,\n",
" 'word': 'able',\n",
" 'start': 46,\n",
" 'end': 50},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7414307,\n",
" 'index': 18,\n",
" 'word': '▁to',\n",
" 'start': 50,\n",
" 'end': 53},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7181038,\n",
" 'index': 19,\n",
" 'word': '▁tell',\n",
" 'start': 53,\n",
" 'end': 58},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7363644,\n",
" 'index': 20,\n",
" 'word': '▁me',\n",
" 'start': 58,\n",
" 'end': 61},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7599349,\n",
" 'index': 21,\n",
" 'word': '▁the',\n",
" 'start': 61,\n",
" 'end': 65},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74890417,\n",
" 'index': 22,\n",
" 'word': '▁whole',\n",
" 'start': 65,\n",
" 'end': 71},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7119105,\n",
" 'index': 23,\n",
" 'word': '▁story',\n",
" 'start': 71,\n",
" 'end': 77},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7301424,\n",
" 'index': 24,\n",
" 'word': '▁about',\n",
" 'start': 77,\n",
" 'end': 83},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7458922,\n",
" 'index': 25,\n",
" 'word': '▁the',\n",
" 'start': 83,\n",
" 'end': 87},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7478779,\n",
" 'index': 26,\n",
" 'word': '▁Face',\n",
" 'start': 87,\n",
" 'end': 92},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72260505,\n",
" 'index': 27,\n",
" 'word': '▁On',\n",
" 'start': 92,\n",
" 'end': 95},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.678619,\n",
" 'index': 28,\n",
" 'word': '▁Mars',\n",
" 'start': 95,\n",
" 'end': 100},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7015995,\n",
" 'index': 29,\n",
" 'word': ',',\n",
" 'start': 100,\n",
" 'end': 101},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76157975,\n",
" 'index': 30,\n",
" 'word': '▁',\n",
" 'start': 101,\n",
" 'end': 102},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7142693,\n",
" 'index': 31,\n",
" 'word': 'which',\n",
" 'start': 102,\n",
" 'end': 107},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7759696,\n",
" 'index': 32,\n",
" 'word': '▁ob',\n",
" 'start': 107,\n",
" 'end': 110},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76725924,\n",
" 'index': 33,\n",
" 'word': 'viously',\n",
" 'start': 110,\n",
" 'end': 117},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7179181,\n",
" 'index': 34,\n",
" 'word': '▁is',\n",
" 'start': 117,\n",
" 'end': 120},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72448343,\n",
" 'index': 35,\n",
" 'word': '▁',\n",
" 'start': 120,\n",
" 'end': 121},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6992092,\n",
" 'index': 36,\n",
" 'word': 'evidence',\n",
" 'start': 121,\n",
" 'end': 129},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71810836,\n",
" 'index': 37,\n",
" 'word': '▁that',\n",
" 'start': 129,\n",
" 'end': 134},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7591603,\n",
" 'index': 38,\n",
" 'word': '▁there',\n",
" 'start': 134,\n",
" 'end': 140},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7224874,\n",
" 'index': 39,\n",
" 'word': '▁is',\n",
" 'start': 140,\n",
" 'end': 143},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7104872,\n",
" 'index': 40,\n",
" 'word': '▁life',\n",
" 'start': 143,\n",
" 'end': 148},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7061166,\n",
" 'index': 41,\n",
" 'word': '▁on',\n",
" 'start': 148,\n",
" 'end': 151},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6738205,\n",
" 'index': 42,\n",
" 'word': '▁Mars',\n",
" 'start': 151,\n",
" 'end': 156},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.66683656,\n",
" 'index': 43,\n",
" 'word': ',',\n",
" 'start': 156,\n",
" 'end': 157},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6961923,\n",
" 'index': 44,\n",
" 'word': '▁and',\n",
" 'start': 157,\n",
" 'end': 161},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7319619,\n",
" 'index': 45,\n",
" 'word': '▁that',\n",
" 'start': 161,\n",
" 'end': 166},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7237144,\n",
" 'index': 46,\n",
" 'word': '▁the',\n",
" 'start': 166,\n",
" 'end': 170},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6963166,\n",
" 'index': 47,\n",
" 'word': '▁face',\n",
" 'start': 170,\n",
" 'end': 175},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74168783,\n",
" 'index': 48,\n",
" 'word': '▁was',\n",
" 'start': 175,\n",
" 'end': 179},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71670306,\n",
" 'index': 49,\n",
" 'word': '▁',\n",
" 'start': 179,\n",
" 'end': 180},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7101172,\n",
" 'index': 50,\n",
" 'word': 'created',\n",
" 'start': 180,\n",
" 'end': 187},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7373488,\n",
" 'index': 51,\n",
" 'word': '▁by',\n",
" 'start': 187,\n",
" 'end': 190},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7071672,\n",
" 'index': 52,\n",
" 'word': '▁alien',\n",
" 'start': 190,\n",
" 'end': 196},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.70015,\n",
" 'index': 53,\n",
" 'word': 's',\n",
" 'start': 196,\n",
" 'end': 197},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6949053,\n",
" 'index': 54,\n",
" 'word': ',',\n",
" 'start': 197,\n",
" 'end': 198},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7373082,\n",
" 'index': 55,\n",
" 'word': '▁correct',\n",
" 'start': 198,\n",
" 'end': 206},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7566203,\n",
" 'index': 56,\n",
" 'word': '?\"',\n",
" 'start': 206,\n",
" 'end': 208},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.79566896,\n",
" 'index': 57,\n",
" 'word': '▁No',\n",
" 'start': 208,\n",
" 'end': 211},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7330964,\n",
" 'index': 58,\n",
" 'word': ',',\n",
" 'start': 211,\n",
" 'end': 212},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.75868404,\n",
" 'index': 59,\n",
" 'word': '▁',\n",
" 'start': 212,\n",
" 'end': 213},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.70785344,\n",
" 'index': 60,\n",
" 'word': 'twenty',\n",
" 'start': 213,\n",
" 'end': 219},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6963004,\n",
" 'index': 61,\n",
" 'word': '▁five',\n",
" 'start': 219,\n",
" 'end': 224},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7040995,\n",
" 'index': 62,\n",
" 'word': '▁years',\n",
" 'start': 224,\n",
" 'end': 230},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7281579,\n",
" 'index': 63,\n",
" 'word': '▁ago',\n",
" 'start': 230,\n",
" 'end': 234},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7362324,\n",
" 'index': 64,\n",
" 'word': ',',\n",
" 'start': 234,\n",
" 'end': 235},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.78284067,\n",
" 'index': 65,\n",
" 'word': '▁our',\n",
" 'start': 235,\n",
" 'end': 239},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7404312,\n",
" 'index': 66,\n",
" 'word': '▁Viking',\n",
" 'start': 239,\n",
" 'end': 246},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6920022,\n",
" 'index': 67,\n",
" 'word': '▁1',\n",
" 'start': 246,\n",
" 'end': 248},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.724633,\n",
" 'index': 68,\n",
" 'word': '▁space',\n",
" 'start': 248,\n",
" 'end': 254},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6913412,\n",
" 'index': 69,\n",
" 'word': 'craft',\n",
" 'start': 254,\n",
" 'end': 259},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7467937,\n",
" 'index': 70,\n",
" 'word': '▁was',\n",
" 'start': 259,\n",
" 'end': 263},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73295933,\n",
" 'index': 71,\n",
" 'word': '▁circ',\n",
" 'start': 263,\n",
" 'end': 268},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7470121,\n",
" 'index': 72,\n",
" 'word': 'ling',\n",
" 'start': 268,\n",
" 'end': 272},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7531431,\n",
" 'index': 73,\n",
" 'word': '▁the',\n",
" 'start': 272,\n",
" 'end': 276},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7101165,\n",
" 'index': 74,\n",
" 'word': '▁planet',\n",
" 'start': 276,\n",
" 'end': 283},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7100098,\n",
" 'index': 75,\n",
" 'word': ',',\n",
" 'start': 283,\n",
" 'end': 284},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7461217,\n",
" 'index': 76,\n",
" 'word': '▁snap',\n",
" 'start': 284,\n",
" 'end': 289},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.739955,\n",
" 'index': 77,\n",
" 'word': 'ping',\n",
" 'start': 289,\n",
" 'end': 293},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7169406,\n",
" 'index': 78,\n",
" 'word': '▁photos',\n",
" 'start': 293,\n",
" 'end': 300},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7049295,\n",
" 'index': 79,\n",
" 'word': ',',\n",
" 'start': 300,\n",
" 'end': 301},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7407019,\n",
" 'index': 80,\n",
" 'word': '▁',\n",
" 'start': 301,\n",
" 'end': 302},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7440167,\n",
" 'index': 81,\n",
" 'word': 'when',\n",
" 'start': 302,\n",
" 'end': 306},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7350766,\n",
" 'index': 82,\n",
" 'word': '▁it',\n",
" 'start': 306,\n",
" 'end': 309},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7492218,\n",
" 'index': 83,\n",
" 'word': '▁spot',\n",
" 'start': 309,\n",
" 'end': 314},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7348358,\n",
" 'index': 84,\n",
" 'word': 'ted',\n",
" 'start': 314,\n",
" 'end': 317},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76565677,\n",
" 'index': 85,\n",
" 'word': '▁the',\n",
" 'start': 317,\n",
" 'end': 321},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73117274,\n",
" 'index': 86,\n",
" 'word': '▁',\n",
" 'start': 321,\n",
" 'end': 322},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7081199,\n",
" 'index': 87,\n",
" 'word': 'shadow',\n",
" 'start': 322,\n",
" 'end': 328},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7255247,\n",
" 'index': 88,\n",
" 'word': 'y',\n",
" 'start': 328,\n",
" 'end': 329},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73350924,\n",
" 'index': 89,\n",
" 'word': '▁like',\n",
" 'start': 329,\n",
" 'end': 334},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72074175,\n",
" 'index': 90,\n",
" 'word': 'ness',\n",
" 'start': 334,\n",
" 'end': 338},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7409807,\n",
" 'index': 91,\n",
" 'word': '▁of',\n",
" 'start': 338,\n",
" 'end': 341},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7388687,\n",
" 'index': 92,\n",
" 'word': '▁',\n",
" 'start': 341,\n",
" 'end': 342},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73592776,\n",
" 'index': 93,\n",
" 'word': 'a',\n",
" 'start': 342,\n",
" 'end': 343},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7441289,\n",
" 'index': 94,\n",
" 'word': '▁human',\n",
" 'start': 343,\n",
" 'end': 349},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73894185,\n",
" 'index': 95,\n",
" 'word': '▁face',\n",
" 'start': 349,\n",
" 'end': 354},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76816374,\n",
" 'index': 96,\n",
" 'word': '.',\n",
" 'start': 354,\n",
" 'end': 355},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.8132516,\n",
" 'index': 97,\n",
" 'word': '▁Us',\n",
" 'start': 355,\n",
" 'end': 358},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7830225,\n",
" 'index': 98,\n",
" 'word': '▁',\n",
" 'start': 358,\n",
" 'end': 359},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7423716,\n",
" 'index': 99,\n",
" 'word': 'scientist',\n",
" 'start': 359,\n",
" 'end': 368},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72860986,\n",
" 'index': 100,\n",
" 'word': 's',\n",
" 'start': 368,\n",
" 'end': 369},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7603417,\n",
" 'index': 101,\n",
" 'word': '▁figure',\n",
" 'start': 369,\n",
" 'end': 376},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7233966,\n",
" 'index': 102,\n",
" 'word': 'd',\n",
" 'start': 376,\n",
" 'end': 377},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73558444,\n",
" 'index': 103,\n",
" 'word': '▁out',\n",
" 'start': 377,\n",
" 'end': 381},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74666375,\n",
" 'index': 104,\n",
" 'word': '▁that',\n",
" 'start': 381,\n",
" 'end': 386},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72817564,\n",
" 'index': 105,\n",
" 'word': '▁it',\n",
" 'start': 386,\n",
" 'end': 389},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7462587,\n",
" 'index': 106,\n",
" 'word': '▁was',\n",
" 'start': 389,\n",
" 'end': 393},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.77071136,\n",
" 'index': 107,\n",
" 'word': '▁just',\n",
" 'start': 393,\n",
" 'end': 398},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7622161,\n",
" 'index': 108,\n",
" 'word': '▁',\n",
" 'start': 398,\n",
" 'end': 399},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7517658,\n",
" 'index': 109,\n",
" 'word': 'another',\n",
" 'start': 399,\n",
" 'end': 406},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7740035,\n",
" 'index': 110,\n",
" 'word': '▁Marti',\n",
" 'start': 406,\n",
" 'end': 412},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7184144,\n",
" 'index': 111,\n",
" 'word': 'an',\n",
" 'start': 412,\n",
" 'end': 414},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.697455,\n",
" 'index': 112,\n",
" 'word': '▁mesa',\n",
" 'start': 414,\n",
" 'end': 419},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7242985,\n",
" 'index': 113,\n",
" 'word': ',',\n",
" 'start': 419,\n",
" 'end': 420},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.729316,\n",
" 'index': 114,\n",
" 'word': '▁common',\n",
" 'start': 420,\n",
" 'end': 427},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7581685,\n",
" 'index': 115,\n",
" 'word': '▁around',\n",
" 'start': 427,\n",
" 'end': 434},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.75228137,\n",
" 'index': 116,\n",
" 'word': '▁Cy',\n",
" 'start': 434,\n",
" 'end': 437},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73617876,\n",
" 'index': 117,\n",
" 'word': 'doni',\n",
" 'start': 437,\n",
" 'end': 441},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.694593,\n",
" 'index': 118,\n",
" 'word': 'a',\n",
" 'start': 441,\n",
" 'end': 442},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71767867,\n",
" 'index': 119,\n",
" 'word': ',',\n",
" 'start': 442,\n",
" 'end': 443},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7295897,\n",
" 'index': 120,\n",
" 'word': '▁only',\n",
" 'start': 443,\n",
" 'end': 448},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73233503,\n",
" 'index': 121,\n",
" 'word': '▁this',\n",
" 'start': 448,\n",
" 'end': 453},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7232187,\n",
" 'index': 122,\n",
" 'word': '▁one',\n",
" 'start': 453,\n",
" 'end': 457},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74401796,\n",
" 'index': 123,\n",
" 'word': '▁had',\n",
" 'start': 457,\n",
" 'end': 461},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7337583,\n",
" 'index': 124,\n",
" 'word': '▁',\n",
" 'start': 461,\n",
" 'end': 462},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7196742,\n",
" 'index': 125,\n",
" 'word': 'shadow',\n",
" 'start': 462,\n",
" 'end': 468},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7004562,\n",
" 'index': 126,\n",
" 'word': 's',\n",
" 'start': 468,\n",
" 'end': 469},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7275282,\n",
" 'index': 127,\n",
" 'word': '▁that',\n",
" 'start': 469,\n",
" 'end': 474},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.729493,\n",
" 'index': 128,\n",
" 'word': '▁made',\n",
" 'start': 474,\n",
" 'end': 479},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7232314,\n",
" 'index': 129,\n",
" 'word': '▁it',\n",
" 'start': 479,\n",
" 'end': 482},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72791636,\n",
" 'index': 130,\n",
" 'word': '▁look',\n",
" 'start': 482,\n",
" 'end': 487},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73750484,\n",
" 'index': 131,\n",
" 'word': '▁like',\n",
" 'start': 487,\n",
" 'end': 492},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7305402,\n",
" 'index': 132,\n",
" 'word': '▁an',\n",
" 'start': 492,\n",
" 'end': 495},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7322336,\n",
" 'index': 133,\n",
" 'word': '▁Egypt',\n",
" 'start': 495,\n",
" 'end': 501},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7042777,\n",
" 'index': 134,\n",
" 'word': 'ion',\n",
" 'start': 501,\n",
" 'end': 504},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.70994717,\n",
" 'index': 135,\n",
" 'word': '▁Phar',\n",
" 'start': 504,\n",
" 'end': 509},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.702662,\n",
" 'index': 136,\n",
" 'word': 'a',\n",
" 'start': 509,\n",
" 'end': 510},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.70684516,\n",
" 'index': 137,\n",
" 'word': 'o',\n",
" 'start': 510,\n",
" 'end': 511},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7180575,\n",
" 'index': 138,\n",
" 'word': 'h',\n",
" 'start': 511,\n",
" 'end': 512},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7585362,\n",
" 'index': 139,\n",
" 'word': '.',\n",
" 'start': 512,\n",
" 'end': 513},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.78797036,\n",
" 'index': 140,\n",
" 'word': '▁',\n",
" 'start': 513,\n",
" 'end': 514},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.77854466,\n",
" 'index': 141,\n",
" 'word': 'Very',\n",
" 'start': 514,\n",
" 'end': 518},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76023924,\n",
" 'index': 142,\n",
" 'word': '▁',\n",
" 'start': 518,\n",
" 'end': 519},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76562846,\n",
" 'index': 143,\n",
" 'word': 'few',\n",
" 'start': 519,\n",
" 'end': 522},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7606485,\n",
" 'index': 144,\n",
" 'word': '▁days',\n",
" 'start': 522,\n",
" 'end': 527},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7725775,\n",
" 'index': 145,\n",
" 'word': '▁later',\n",
" 'start': 527,\n",
" 'end': 533},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7549871,\n",
" 'index': 146,\n",
" 'word': ',',\n",
" 'start': 533,\n",
" 'end': 534},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7813583,\n",
" 'index': 147,\n",
" 'word': '▁we',\n",
" 'start': 534,\n",
" 'end': 537},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73188114,\n",
" 'index': 148,\n",
" 'word': '▁',\n",
" 'start': 537,\n",
" 'end': 538},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72285676,\n",
" 'index': 149,\n",
" 'word': 'reveal',\n",
" 'start': 538,\n",
" 'end': 544},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73699045,\n",
" 'index': 150,\n",
" 'word': 'ed',\n",
" 'start': 544,\n",
" 'end': 546},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.732305,\n",
" 'index': 151,\n",
" 'word': '▁the',\n",
" 'start': 546,\n",
" 'end': 550},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71078587,\n",
" 'index': 152,\n",
" 'word': '▁image',\n",
" 'start': 550,\n",
" 'end': 556},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7434471,\n",
" 'index': 153,\n",
" 'word': '▁for',\n",
" 'start': 556,\n",
" 'end': 560},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7312815,\n",
" 'index': 154,\n",
" 'word': '▁all',\n",
" 'start': 560,\n",
" 'end': 564},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74657434,\n",
" 'index': 155,\n",
" 'word': '▁to',\n",
" 'start': 564,\n",
" 'end': 567},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7268362,\n",
" 'index': 156,\n",
" 'word': '▁see',\n",
" 'start': 567,\n",
" 'end': 571},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7400499,\n",
" 'index': 157,\n",
" 'word': ',',\n",
" 'start': 571,\n",
" 'end': 572},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7625019,\n",
" 'index': 158,\n",
" 'word': '▁and',\n",
" 'start': 572,\n",
" 'end': 576},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7985083,\n",
" 'index': 159,\n",
" 'word': '▁we',\n",
" 'start': 576,\n",
" 'end': 579},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.77577597,\n",
" 'index': 160,\n",
" 'word': '▁made',\n",
" 'start': 579,\n",
" 'end': 584},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.78290325,\n",
" 'index': 161,\n",
" 'word': '▁sure',\n",
" 'start': 584,\n",
" 'end': 589},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76356405,\n",
" 'index': 162,\n",
" 'word': '▁to',\n",
" 'start': 589,\n",
" 'end': 592},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73777944,\n",
" 'index': 163,\n",
" 'word': '▁note',\n",
" 'start': 592,\n",
" 'end': 597},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76957136,\n",
" 'index': 164,\n",
" 'word': '▁that',\n",
" 'start': 597,\n",
" 'end': 602},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76147383,\n",
" 'index': 165,\n",
" 'word': '▁it',\n",
" 'start': 602,\n",
" 'end': 605},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.78018135,\n",
" 'index': 166,\n",
" 'word': '▁was',\n",
" 'start': 605,\n",
" 'end': 609},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76262224,\n",
" 'index': 167,\n",
" 'word': '▁',\n",
" 'start': 609,\n",
" 'end': 610},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7494763,\n",
" 'index': 168,\n",
" 'word': 'a',\n",
" 'start': 610,\n",
" 'end': 611},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76614785,\n",
" 'index': 169,\n",
" 'word': '▁',\n",
" 'start': 611,\n",
" 'end': 612},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.75811285,\n",
" 'index': 170,\n",
" 'word': 'huge',\n",
" 'start': 612,\n",
" 'end': 616},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74348,\n",
" 'index': 171,\n",
" 'word': '▁rock',\n",
" 'start': 616,\n",
" 'end': 621},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73305815,\n",
" 'index': 172,\n",
" 'word': '▁formation',\n",
" 'start': 621,\n",
" 'end': 631},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74695593,\n",
" 'index': 173,\n",
" 'word': '▁that',\n",
" 'start': 631,\n",
" 'end': 636},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7620345,\n",
" 'index': 174,\n",
" 'word': '▁just',\n",
" 'start': 636,\n",
" 'end': 641},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7426398,\n",
" 'index': 175,\n",
" 'word': '▁rese',\n",
" 'start': 641,\n",
" 'end': 646},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7361759,\n",
" 'index': 176,\n",
" 'word': 'mbled',\n",
" 'start': 646,\n",
" 'end': 651},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72064775,\n",
" 'index': 177,\n",
" 'word': '▁',\n",
" 'start': 651,\n",
" 'end': 652},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7398345,\n",
" 'index': 178,\n",
" 'word': 'a',\n",
" 'start': 652,\n",
" 'end': 653},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7524488,\n",
" 'index': 179,\n",
" 'word': '▁human',\n",
" 'start': 653,\n",
" 'end': 659},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7174149,\n",
" 'index': 180,\n",
" 'word': '▁head',\n",
" 'start': 659,\n",
" 'end': 664},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7290167,\n",
" 'index': 181,\n",
" 'word': '▁and',\n",
" 'start': 664,\n",
" 'end': 668},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7181655,\n",
" 'index': 182,\n",
" 'word': '▁face',\n",
" 'start': 668,\n",
" 'end': 673},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73066306,\n",
" 'index': 183,\n",
" 'word': ',',\n",
" 'start': 673,\n",
" 'end': 674},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7215076,\n",
" 'index': 184,\n",
" 'word': '▁but',\n",
" 'start': 674,\n",
" 'end': 678},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7252736,\n",
" 'index': 185,\n",
" 'word': '▁all',\n",
" 'start': 678,\n",
" 'end': 682},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72834283,\n",
" 'index': 186,\n",
" 'word': '▁of',\n",
" 'start': 682,\n",
" 'end': 685},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.753237,\n",
" 'index': 187,\n",
" 'word': '▁it',\n",
" 'start': 685,\n",
" 'end': 688},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76431835,\n",
" 'index': 188,\n",
" 'word': '▁was',\n",
" 'start': 688,\n",
" 'end': 692},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7625411,\n",
" 'index': 189,\n",
" 'word': '▁',\n",
" 'start': 692,\n",
" 'end': 693},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76240027,\n",
" 'index': 190,\n",
" 'word': 'formed',\n",
" 'start': 693,\n",
" 'end': 699},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.75998336,\n",
" 'index': 191,\n",
" 'word': '▁by',\n",
" 'start': 699,\n",
" 'end': 702},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7409608,\n",
" 'index': 192,\n",
" 'word': '▁',\n",
" 'start': 702,\n",
" 'end': 703},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72436345,\n",
" 'index': 193,\n",
" 'word': 'shadow',\n",
" 'start': 703,\n",
" 'end': 709},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7190035,\n",
" 'index': 194,\n",
" 'word': 's',\n",
" 'start': 709,\n",
" 'end': 710},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.78103834,\n",
" 'index': 195,\n",
" 'word': '.',\n",
" 'start': 710,\n",
" 'end': 711},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7606551,\n",
" 'index': 196,\n",
" 'word': '▁We',\n",
" 'start': 711,\n",
" 'end': 714},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7695547,\n",
" 'index': 197,\n",
" 'word': '▁only',\n",
" 'start': 714,\n",
" 'end': 719},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.75947803,\n",
" 'index': 198,\n",
" 'word': '▁',\n",
" 'start': 719,\n",
" 'end': 720},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.713467,\n",
" 'index': 199,\n",
" 'word': 'announced',\n",
" 'start': 720,\n",
" 'end': 729},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6898313,\n",
" 'index': 200,\n",
" 'word': '▁it',\n",
" 'start': 729,\n",
" 'end': 732},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72763705,\n",
" 'index': 201,\n",
" 'word': '▁',\n",
" 'start': 732,\n",
" 'end': 733},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74799156,\n",
" 'index': 202,\n",
" 'word': 'because',\n",
" 'start': 733,\n",
" 'end': 740},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7443309,\n",
" 'index': 203,\n",
" 'word': '▁we',\n",
" 'start': 740,\n",
" 'end': 743},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7246991,\n",
" 'index': 204,\n",
" 'word': '▁thought',\n",
" 'start': 743,\n",
" 'end': 751},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6987896,\n",
" 'index': 205,\n",
" 'word': '▁it',\n",
" 'start': 751,\n",
" 'end': 754},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.70192343,\n",
" 'index': 206,\n",
" 'word': '▁',\n",
" 'start': 754,\n",
" 'end': 755},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6921032,\n",
" 'index': 207,\n",
" 'word': 'would',\n",
" 'start': 755,\n",
" 'end': 760},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7164251,\n",
" 'index': 208,\n",
" 'word': '▁be',\n",
" 'start': 760,\n",
" 'end': 763},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6848709,\n",
" 'index': 209,\n",
" 'word': '▁',\n",
" 'start': 763,\n",
" 'end': 764},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7095274,\n",
" 'index': 210,\n",
" 'word': 'a',\n",
" 'start': 764,\n",
" 'end': 765},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7103807,\n",
" 'index': 211,\n",
" 'word': '▁good',\n",
" 'start': 765,\n",
" 'end': 770},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7356097,\n",
" 'index': 212,\n",
" 'word': '▁way',\n",
" 'start': 770,\n",
" 'end': 774},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.75338656,\n",
" 'index': 213,\n",
" 'word': '▁to',\n",
" 'start': 774,\n",
" 'end': 777},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74160135,\n",
" 'index': 214,\n",
" 'word': '▁engage',\n",
" 'start': 777,\n",
" 'end': 784},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.78099227,\n",
" 'index': 215,\n",
" 'word': '▁the',\n",
" 'start': 784,\n",
" 'end': 788},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74460626,\n",
" 'index': 216,\n",
" 'word': '▁public',\n",
" 'start': 788,\n",
" 'end': 795},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7796549,\n",
" 'index': 217,\n",
" 'word': '▁with',\n",
" 'start': 795,\n",
" 'end': 800},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.735151,\n",
" 'index': 218,\n",
" 'word': '▁NASA',\n",
" 'start': 800,\n",
" 'end': 805},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73245096,\n",
" 'index': 219,\n",
" 'word': \"'\",\n",
" 'start': 805,\n",
" 'end': 806},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7472554,\n",
" 'index': 220,\n",
" 'word': 's',\n",
" 'start': 806,\n",
" 'end': 807},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7569386,\n",
" 'index': 221,\n",
" 'word': '▁',\n",
" 'start': 807,\n",
" 'end': 808},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73314124,\n",
" 'index': 222,\n",
" 'word': 'finding',\n",
" 'start': 808,\n",
" 'end': 815},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7164539,\n",
" 'index': 223,\n",
" 'word': 's',\n",
" 'start': 815,\n",
" 'end': 816},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72332114,\n",
" 'index': 224,\n",
" 'word': ',',\n",
" 'start': 816,\n",
" 'end': 817},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.70425224,\n",
" 'index': 225,\n",
" 'word': '▁and',\n",
" 'start': 817,\n",
" 'end': 821},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73506117,\n",
" 'index': 226,\n",
" 'word': '▁at',\n",
" 'start': 821,\n",
" 'end': 824},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.64045215,\n",
" 'index': 227,\n",
" 'word': 'rra',\n",
" 'start': 824,\n",
" 'end': 827},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6795728,\n",
" 'index': 228,\n",
" 'word': 'c',\n",
" 'start': 827,\n",
" 'end': 828},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6943617,\n",
" 'index': 229,\n",
" 'word': 't',\n",
" 'start': 828,\n",
" 'end': 829},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.712106,\n",
" 'index': 230,\n",
" 'word': '▁attention',\n",
" 'start': 829,\n",
" 'end': 839},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72840285,\n",
" 'index': 231,\n",
" 'word': '▁to',\n",
" 'start': 839,\n",
" 'end': 842},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6708754,\n",
" 'index': 232,\n",
" 'word': '▁Mars',\n",
" 'start': 842,\n",
" 'end': 847},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.66703403,\n",
" 'index': 233,\n",
" 'word': '-',\n",
" 'start': 847,\n",
" 'end': 848},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6601703,\n",
" 'index': 234,\n",
" 'word': '-',\n",
" 'start': 848,\n",
" 'end': 849},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71845216,\n",
" 'index': 235,\n",
" 'word': '▁and',\n",
" 'start': 849,\n",
" 'end': 853},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6982402,\n",
" 'index': 236,\n",
" 'word': '▁it',\n",
" 'start': 853,\n",
" 'end': 856},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.78411067,\n",
" 'index': 237,\n",
" 'word': '▁did',\n",
" 'start': 856,\n",
" 'end': 860},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7109891,\n",
" 'index': 238,\n",
" 'word': '.',\n",
" 'start': 860,\n",
" 'end': 861},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.77790356,\n",
" 'index': 239,\n",
" 'word': '▁The',\n",
" 'start': 862,\n",
" 'end': 866},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7041047,\n",
" 'index': 240,\n",
" 'word': '▁face',\n",
" 'start': 866,\n",
" 'end': 871},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.70027363,\n",
" 'index': 241,\n",
" 'word': '▁on',\n",
" 'start': 871,\n",
" 'end': 874},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.67925966,\n",
" 'index': 242,\n",
" 'word': '▁Mars',\n",
" 'start': 874,\n",
" 'end': 879},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7132371,\n",
" 'index': 243,\n",
" 'word': '▁soon',\n",
" 'start': 879,\n",
" 'end': 884},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.70119303,\n",
" 'index': 244,\n",
" 'word': '▁b',\n",
" 'start': 884,\n",
" 'end': 886},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7086833,\n",
" 'index': 245,\n",
" 'word': 'ecame',\n",
" 'start': 886,\n",
" 'end': 891},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71190226,\n",
" 'index': 246,\n",
" 'word': '▁',\n",
" 'start': 891,\n",
" 'end': 892},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.721647,\n",
" 'index': 247,\n",
" 'word': 'a',\n",
" 'start': 892,\n",
" 'end': 893},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71419626,\n",
" 'index': 248,\n",
" 'word': '▁pop',\n",
" 'start': 893,\n",
" 'end': 897},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7087096,\n",
" 'index': 249,\n",
" 'word': '▁',\n",
" 'start': 897,\n",
" 'end': 898},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6964584,\n",
" 'index': 250,\n",
" 'word': 'icon',\n",
" 'start': 898,\n",
" 'end': 902},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.70615745,\n",
" 'index': 251,\n",
" 'word': ';',\n",
" 'start': 902,\n",
" 'end': 903},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7585035,\n",
" 'index': 252,\n",
" 'word': '▁shot',\n",
" 'start': 903,\n",
" 'end': 908},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76861167,\n",
" 'index': 253,\n",
" 'word': '▁in',\n",
" 'start': 908,\n",
" 'end': 911},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7368769,\n",
" 'index': 254,\n",
" 'word': '▁movies',\n",
" 'start': 911,\n",
" 'end': 918},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71782357,\n",
" 'index': 255,\n",
" 'word': ',',\n",
" 'start': 918,\n",
" 'end': 919},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74544686,\n",
" 'index': 256,\n",
" 'word': '▁',\n",
" 'start': 919,\n",
" 'end': 920},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74319994,\n",
" 'index': 257,\n",
" 'word': 'appear',\n",
" 'start': 920,\n",
" 'end': 926},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7328243,\n",
" 'index': 258,\n",
" 'word': 'ed',\n",
" 'start': 926,\n",
" 'end': 928},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.75824165,\n",
" 'index': 259,\n",
" 'word': '▁in',\n",
" 'start': 928,\n",
" 'end': 931},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73930544,\n",
" 'index': 260,\n",
" 'word': '▁books',\n",
" 'start': 931,\n",
" 'end': 937},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7212368,\n",
" 'index': 261,\n",
" 'word': ',',\n",
" 'start': 937,\n",
" 'end': 938},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7187186,\n",
" 'index': 262,\n",
" 'word': '▁magazine',\n",
" 'start': 938,\n",
" 'end': 947},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7175941,\n",
" 'index': 263,\n",
" 'word': 's',\n",
" 'start': 947,\n",
" 'end': 948},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7070163,\n",
" 'index': 264,\n",
" 'word': ',',\n",
" 'start': 948,\n",
" 'end': 949},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7512239,\n",
" 'index': 265,\n",
" 'word': '▁radio',\n",
" 'start': 949,\n",
" 'end': 955},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7268631,\n",
" 'index': 266,\n",
" 'word': '▁talk',\n",
" 'start': 955,\n",
" 'end': 960},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6982043,\n",
" 'index': 267,\n",
" 'word': '▁',\n",
" 'start': 960,\n",
" 'end': 961},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7089483,\n",
" 'index': 268,\n",
" 'word': 'shows',\n",
" 'start': 961,\n",
" 'end': 966},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6838974,\n",
" 'index': 269,\n",
" 'word': ',',\n",
" 'start': 966,\n",
" 'end': 967},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.69852626,\n",
" 'index': 270,\n",
" 'word': '▁and',\n",
" 'start': 967,\n",
" 'end': 971},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72184855,\n",
" 'index': 271,\n",
" 'word': '▁ha',\n",
" 'start': 971,\n",
" 'end': 974},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.70426184,\n",
" 'index': 272,\n",
" 'word': 'unted',\n",
" 'start': 974,\n",
" 'end': 979},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7255174,\n",
" 'index': 273,\n",
" 'word': '▁gro',\n",
" 'start': 979,\n",
" 'end': 983},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73427945,\n",
" 'index': 274,\n",
" 'word': 'cery',\n",
" 'start': 983,\n",
" 'end': 987},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7239857,\n",
" 'index': 275,\n",
" 'word': '▁store',\n",
" 'start': 987,\n",
" 'end': 993},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.70565,\n",
" 'index': 276,\n",
" 'word': '▁',\n",
" 'start': 993,\n",
" 'end': 994},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6898292,\n",
" 'index': 277,\n",
" 'word': 'checkout',\n",
" 'start': 994,\n",
" 'end': 1002},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.69602674,\n",
" 'index': 278,\n",
" 'word': '▁',\n",
" 'start': 1002,\n",
" 'end': 1003},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6891,\n",
" 'index': 279,\n",
" 'word': 'lines',\n",
" 'start': 1003,\n",
" 'end': 1008},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73144776,\n",
" 'index': 280,\n",
" 'word': '▁for',\n",
" 'start': 1008,\n",
" 'end': 1012},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6338651,\n",
" 'index': 281,\n",
" 'word': '▁25',\n",
" 'start': 1012,\n",
" 'end': 1015},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7095752,\n",
" 'index': 282,\n",
" 'word': '▁years',\n",
" 'start': 1015,\n",
" 'end': 1021},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73906696,\n",
" 'index': 283,\n",
" 'word': '.',\n",
" 'start': 1021,\n",
" 'end': 1022},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.8037943,\n",
" 'index': 284,\n",
" 'word': '▁Some',\n",
" 'start': 1022,\n",
" 'end': 1027},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.77938217,\n",
" 'index': 285,\n",
" 'word': '▁people',\n",
" 'start': 1027,\n",
" 'end': 1034},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.762757,\n",
" 'index': 286,\n",
" 'word': '▁thought',\n",
" 'start': 1034,\n",
" 'end': 1042},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.8229662,\n",
" 'index': 287,\n",
" 'word': '▁the',\n",
" 'start': 1042,\n",
" 'end': 1046},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7882584,\n",
" 'index': 288,\n",
" 'word': '▁natural',\n",
" 'start': 1046,\n",
" 'end': 1054},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7520508,\n",
" 'index': 289,\n",
" 'word': '▁land',\n",
" 'start': 1054,\n",
" 'end': 1059},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73779494,\n",
" 'index': 290,\n",
" 'word': 'form',\n",
" 'start': 1059,\n",
" 'end': 1063},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7814589,\n",
" 'index': 291,\n",
" 'word': '▁was',\n",
" 'start': 1063,\n",
" 'end': 1067},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74082786,\n",
" 'index': 292,\n",
" 'word': '▁',\n",
" 'start': 1067,\n",
" 'end': 1068},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7207082,\n",
" 'index': 293,\n",
" 'word': 'evidence',\n",
" 'start': 1068,\n",
" 'end': 1076},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7479461,\n",
" 'index': 294,\n",
" 'word': '▁of',\n",
" 'start': 1076,\n",
" 'end': 1079},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.755139,\n",
" 'index': 295,\n",
" 'word': '▁life',\n",
" 'start': 1079,\n",
" 'end': 1084},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7355991,\n",
" 'index': 296,\n",
" 'word': '▁on',\n",
" 'start': 1084,\n",
" 'end': 1087},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7187748,\n",
" 'index': 297,\n",
" 'word': '▁Mars',\n",
" 'start': 1087,\n",
" 'end': 1092},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7418989,\n",
" 'index': 298,\n",
" 'word': ',',\n",
" 'start': 1092,\n",
" 'end': 1093},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73773444,\n",
" 'index': 299,\n",
" 'word': '▁and',\n",
" 'start': 1093,\n",
" 'end': 1097},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7697259,\n",
" 'index': 300,\n",
" 'word': '▁that',\n",
" 'start': 1097,\n",
" 'end': 1102},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.75266504,\n",
" 'index': 301,\n",
" 'word': '▁',\n",
" 'start': 1102,\n",
" 'end': 1103},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7332634,\n",
" 'index': 302,\n",
" 'word': 'us',\n",
" 'start': 1103,\n",
" 'end': 1105},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7624355,\n",
" 'index': 303,\n",
" 'word': '▁',\n",
" 'start': 1105,\n",
" 'end': 1106},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7445762,\n",
" 'index': 304,\n",
" 'word': 'scientist',\n",
" 'start': 1106,\n",
" 'end': 1115},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72726446,\n",
" 'index': 305,\n",
" 'word': 's',\n",
" 'start': 1115,\n",
" 'end': 1116},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74028957,\n",
" 'index': 306,\n",
" 'word': '▁',\n",
" 'start': 1116,\n",
" 'end': 1117},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7528527,\n",
" 'index': 307,\n",
" 'word': 'wanted',\n",
" 'start': 1117,\n",
" 'end': 1123},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7751754,\n",
" 'index': 308,\n",
" 'word': '▁to',\n",
" 'start': 1123,\n",
" 'end': 1126},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7538359,\n",
" 'index': 309,\n",
" 'word': '▁',\n",
" 'start': 1126,\n",
" 'end': 1127},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72743875,\n",
" 'index': 310,\n",
" 'word': 'hide',\n",
" 'start': 1127,\n",
" 'end': 1131},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74001145,\n",
" 'index': 311,\n",
" 'word': '▁it',\n",
" 'start': 1131,\n",
" 'end': 1134},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7524098,\n",
" 'index': 312,\n",
" 'word': ',',\n",
" 'start': 1134,\n",
" 'end': 1135},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.80662686,\n",
" 'index': 313,\n",
" 'word': '▁but',\n",
" 'start': 1135,\n",
" 'end': 1139},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.78448457,\n",
" 'index': 314,\n",
" 'word': '▁',\n",
" 'start': 1139,\n",
" 'end': 1140},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.70067596,\n",
" 'index': 315,\n",
" 'word': 'really',\n",
" 'start': 1140,\n",
" 'end': 1146},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73782057,\n",
" 'index': 316,\n",
" 'word': ',',\n",
" 'start': 1146,\n",
" 'end': 1147},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.78326744,\n",
" 'index': 317,\n",
" 'word': '▁the',\n",
" 'start': 1147,\n",
" 'end': 1151},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7770973,\n",
" 'index': 318,\n",
" 'word': '▁',\n",
" 'start': 1151,\n",
" 'end': 1152},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72133195,\n",
" 'index': 319,\n",
" 'word': 'defender',\n",
" 'start': 1152,\n",
" 'end': 1160},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73959935,\n",
" 'index': 320,\n",
" 'word': 's',\n",
" 'start': 1160,\n",
" 'end': 1161},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76056314,\n",
" 'index': 321,\n",
" 'word': '▁of',\n",
" 'start': 1161,\n",
" 'end': 1164},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7938618,\n",
" 'index': 322,\n",
" 'word': '▁the',\n",
" 'start': 1164,\n",
" 'end': 1168},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.752001,\n",
" 'index': 323,\n",
" 'word': '▁NASA',\n",
" 'start': 1168,\n",
" 'end': 1173},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72408575,\n",
" 'index': 324,\n",
" 'word': '▁budget',\n",
" 'start': 1173,\n",
" 'end': 1180},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.70957243,\n",
" 'index': 325,\n",
" 'word': '▁wish',\n",
" 'start': 1180,\n",
" 'end': 1185},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7567643,\n",
" 'index': 326,\n",
" 'word': '▁there',\n",
" 'start': 1185,\n",
" 'end': 1191},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.75911105,\n",
" 'index': 327,\n",
" 'word': '▁was',\n",
" 'start': 1191,\n",
" 'end': 1195},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76012385,\n",
" 'index': 328,\n",
" 'word': '▁',\n",
" 'start': 1195,\n",
" 'end': 1196},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.732424,\n",
" 'index': 329,\n",
" 'word': 'ancient',\n",
" 'start': 1196,\n",
" 'end': 1203},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73175794,\n",
" 'index': 330,\n",
" 'word': '▁c',\n",
" 'start': 1203,\n",
" 'end': 1205},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7199019,\n",
" 'index': 331,\n",
" 'word': 'ivilization',\n",
" 'start': 1205,\n",
" 'end': 1216},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7307094,\n",
" 'index': 332,\n",
" 'word': '▁on',\n",
" 'start': 1216,\n",
" 'end': 1219},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7247836,\n",
" 'index': 333,\n",
" 'word': '▁Mars',\n",
" 'start': 1219,\n",
" 'end': 1224},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7309902,\n",
" 'index': 334,\n",
" 'word': '.',\n",
" 'start': 1224,\n",
" 'end': 1225},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7614061,\n",
" 'index': 335,\n",
" 'word': '▁We',\n",
" 'start': 1225,\n",
" 'end': 1228},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72096175,\n",
" 'index': 336,\n",
" 'word': '▁decide',\n",
" 'start': 1228,\n",
" 'end': 1235},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.69636214,\n",
" 'index': 337,\n",
" 'word': 'd',\n",
" 'start': 1235,\n",
" 'end': 1236},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7201302,\n",
" 'index': 338,\n",
" 'word': '▁to',\n",
" 'start': 1236,\n",
" 'end': 1239},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71916217,\n",
" 'index': 339,\n",
" 'word': '▁take',\n",
" 'start': 1239,\n",
" 'end': 1244},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.75461584,\n",
" 'index': 340,\n",
" 'word': '▁',\n",
" 'start': 1244,\n",
" 'end': 1245},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7655589,\n",
" 'index': 341,\n",
" 'word': 'another',\n",
" 'start': 1245,\n",
" 'end': 1252},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7383289,\n",
" 'index': 342,\n",
" 'word': '▁shot',\n",
" 'start': 1252,\n",
" 'end': 1257},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76632655,\n",
" 'index': 343,\n",
" 'word': '▁just',\n",
" 'start': 1257,\n",
" 'end': 1262},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7247141,\n",
" 'index': 344,\n",
" 'word': '▁to',\n",
" 'start': 1262,\n",
" 'end': 1265},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73396194,\n",
" 'index': 345,\n",
" 'word': '▁make',\n",
" 'start': 1265,\n",
" 'end': 1270},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.75854856,\n",
" 'index': 346,\n",
" 'word': '▁sure',\n",
" 'start': 1270,\n",
" 'end': 1275},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76170975,\n",
" 'index': 347,\n",
" 'word': '▁we',\n",
" 'start': 1275,\n",
" 'end': 1278},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.75737375,\n",
" 'index': 348,\n",
" 'word': '▁were',\n",
" 'start': 1278,\n",
" 'end': 1283},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7405633,\n",
" 'index': 349,\n",
" 'word': 'n',\n",
" 'start': 1283,\n",
" 'end': 1284},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7373094,\n",
" 'index': 350,\n",
" 'word': \"'\",\n",
" 'start': 1284,\n",
" 'end': 1285},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74561375,\n",
" 'index': 351,\n",
" 'word': 't',\n",
" 'start': 1285,\n",
" 'end': 1286},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7152604,\n",
" 'index': 352,\n",
" 'word': '▁wrong',\n",
" 'start': 1286,\n",
" 'end': 1292},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71036845,\n",
" 'index': 353,\n",
" 'word': ',',\n",
" 'start': 1292,\n",
" 'end': 1293},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7824172,\n",
" 'index': 354,\n",
" 'word': '▁on',\n",
" 'start': 1293,\n",
" 'end': 1296},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76754534,\n",
" 'index': 355,\n",
" 'word': '▁April',\n",
" 'start': 1296,\n",
" 'end': 1302},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7371393,\n",
" 'index': 356,\n",
" 'word': '▁5,',\n",
" 'start': 1302,\n",
" 'end': 1305},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6837726,\n",
" 'index': 357,\n",
" 'word': '▁1998',\n",
" 'start': 1305,\n",
" 'end': 1310},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7280978,\n",
" 'index': 358,\n",
" 'word': '.',\n",
" 'start': 1310,\n",
" 'end': 1311},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7754057,\n",
" 'index': 359,\n",
" 'word': '▁Michael',\n",
" 'start': 1311,\n",
" 'end': 1319},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6482514,\n",
" 'index': 360,\n",
" 'word': '▁Malin',\n",
" 'start': 1319,\n",
" 'end': 1325},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.70006573,\n",
" 'index': 361,\n",
" 'word': '▁and',\n",
" 'start': 1325,\n",
" 'end': 1329},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7558913,\n",
" 'index': 362,\n",
" 'word': '▁his',\n",
" 'start': 1329,\n",
" 'end': 1333},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7198912,\n",
" 'index': 363,\n",
" 'word': '▁Mars',\n",
" 'start': 1333,\n",
" 'end': 1338},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7419942,\n",
" 'index': 364,\n",
" 'word': '▁',\n",
" 'start': 1338,\n",
" 'end': 1339},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72979295,\n",
" 'index': 365,\n",
" 'word': 'Orbit',\n",
" 'start': 1339,\n",
" 'end': 1344},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7115055,\n",
" 'index': 366,\n",
" 'word': 'er',\n",
" 'start': 1344,\n",
" 'end': 1346},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7362249,\n",
" 'index': 367,\n",
" 'word': '▁camera',\n",
" 'start': 1346,\n",
" 'end': 1353},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6770409,\n",
" 'index': 368,\n",
" 'word': '▁team',\n",
" 'start': 1353,\n",
" 'end': 1358},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72247416,\n",
" 'index': 369,\n",
" 'word': '▁',\n",
" 'start': 1358,\n",
" 'end': 1359},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7055674,\n",
" 'index': 370,\n",
" 'word': 'took',\n",
" 'start': 1359,\n",
" 'end': 1363},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73880804,\n",
" 'index': 371,\n",
" 'word': '▁',\n",
" 'start': 1363,\n",
" 'end': 1364},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71011674,\n",
" 'index': 372,\n",
" 'word': 'a',\n",
" 'start': 1364,\n",
" 'end': 1365},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6853782,\n",
" 'index': 373,\n",
" 'word': '▁picture',\n",
" 'start': 1365,\n",
" 'end': 1373},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7360565,\n",
" 'index': 374,\n",
" 'word': '▁that',\n",
" 'start': 1373,\n",
" 'end': 1378},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7494284,\n",
" 'index': 375,\n",
" 'word': '▁was',\n",
" 'start': 1378,\n",
" 'end': 1382},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7195536,\n",
" 'index': 376,\n",
" 'word': '▁ten',\n",
" 'start': 1382,\n",
" 'end': 1386},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.75279045,\n",
" 'index': 377,\n",
" 'word': '▁times',\n",
" 'start': 1386,\n",
" 'end': 1392},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7426931,\n",
" 'index': 378,\n",
" 'word': '▁',\n",
" 'start': 1392,\n",
" 'end': 1393},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72122455,\n",
" 'index': 379,\n",
" 'word': 'sharp',\n",
" 'start': 1393,\n",
" 'end': 1398},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7237729,\n",
" 'index': 380,\n",
" 'word': 'er',\n",
" 'start': 1398,\n",
" 'end': 1400},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76766646,\n",
" 'index': 381,\n",
" 'word': '▁than',\n",
" 'start': 1400,\n",
" 'end': 1405},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.77287704,\n",
" 'index': 382,\n",
" 'word': '▁the',\n",
" 'start': 1405,\n",
" 'end': 1409},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.77964455,\n",
" 'index': 383,\n",
" 'word': '▁original',\n",
" 'start': 1409,\n",
" 'end': 1418},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7435452,\n",
" 'index': 384,\n",
" 'word': '▁Viking',\n",
" 'start': 1418,\n",
" 'end': 1425},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7510399,\n",
" 'index': 385,\n",
" 'word': '▁photos',\n",
" 'start': 1425,\n",
" 'end': 1432},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71882915,\n",
" 'index': 386,\n",
" 'word': ',',\n",
" 'start': 1432,\n",
" 'end': 1433},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7779149,\n",
" 'index': 387,\n",
" 'word': '▁',\n",
" 'start': 1433,\n",
" 'end': 1434},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7555608,\n",
" 'index': 388,\n",
" 'word': 'reveal',\n",
" 'start': 1434,\n",
" 'end': 1440},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.75253564,\n",
" 'index': 389,\n",
" 'word': 'ing',\n",
" 'start': 1440,\n",
" 'end': 1443},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76151603,\n",
" 'index': 390,\n",
" 'word': '▁',\n",
" 'start': 1443,\n",
" 'end': 1444},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74064636,\n",
" 'index': 391,\n",
" 'word': 'a',\n",
" 'start': 1444,\n",
" 'end': 1445},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.75289667,\n",
" 'index': 392,\n",
" 'word': '▁natural',\n",
" 'start': 1445,\n",
" 'end': 1453},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7026849,\n",
" 'index': 393,\n",
" 'word': '▁land',\n",
" 'start': 1453,\n",
" 'end': 1458},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71034503,\n",
" 'index': 394,\n",
" 'word': 'form',\n",
" 'start': 1458,\n",
" 'end': 1462},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7105492,\n",
" 'index': 395,\n",
" 'word': ',',\n",
" 'start': 1462,\n",
" 'end': 1463},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73492867,\n",
" 'index': 396,\n",
" 'word': '▁',\n",
" 'start': 1463,\n",
" 'end': 1464},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73296964,\n",
" 'index': 397,\n",
" 'word': 'which',\n",
" 'start': 1464,\n",
" 'end': 1469},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.749125,\n",
" 'index': 398,\n",
" 'word': '▁me',\n",
" 'start': 1469,\n",
" 'end': 1472},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7666059,\n",
" 'index': 399,\n",
" 'word': 'ant',\n",
" 'start': 1472,\n",
" 'end': 1475},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.78626037,\n",
" 'index': 400,\n",
" 'word': '▁no',\n",
" 'start': 1475,\n",
" 'end': 1478},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7852426,\n",
" 'index': 401,\n",
" 'word': '▁alien',\n",
" 'start': 1478,\n",
" 'end': 1484},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7328261,\n",
" 'index': 402,\n",
" 'word': '▁monument',\n",
" 'start': 1484,\n",
" 'end': 1493},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.75887156,\n",
" 'index': 403,\n",
" 'word': '.',\n",
" 'start': 1493,\n",
" 'end': 1494},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7391275,\n",
" 'index': 404,\n",
" 'word': '▁\"',\n",
" 'start': 1494,\n",
" 'end': 1496},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7504874,\n",
" 'index': 405,\n",
" 'word': 'But',\n",
" 'start': 1496,\n",
" 'end': 1499},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7821028,\n",
" 'index': 406,\n",
" 'word': '▁that',\n",
" 'start': 1499,\n",
" 'end': 1504},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72317517,\n",
" 'index': 407,\n",
" 'word': '▁picture',\n",
" 'start': 1504,\n",
" 'end': 1512},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.760265,\n",
" 'index': 408,\n",
" 'word': '▁was',\n",
" 'start': 1512,\n",
" 'end': 1516},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7557999,\n",
" 'index': 409,\n",
" 'word': 'n',\n",
" 'start': 1516,\n",
" 'end': 1517},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7516783,\n",
" 'index': 410,\n",
" 'word': \"'\",\n",
" 'start': 1517,\n",
" 'end': 1518},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.767745,\n",
" 'index': 411,\n",
" 'word': 't',\n",
" 'start': 1518,\n",
" 'end': 1519},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.70629925,\n",
" 'index': 412,\n",
" 'word': '▁',\n",
" 'start': 1519,\n",
" 'end': 1520},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7736552,\n",
" 'index': 413,\n",
" 'word': 'very',\n",
" 'start': 1520,\n",
" 'end': 1524},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7615671,\n",
" 'index': 414,\n",
" 'word': '▁clear',\n",
" 'start': 1524,\n",
" 'end': 1530},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7786598,\n",
" 'index': 415,\n",
" 'word': '▁at',\n",
" 'start': 1530,\n",
" 'end': 1533},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76966304,\n",
" 'index': 416,\n",
" 'word': '▁all',\n",
" 'start': 1533,\n",
" 'end': 1537},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7294976,\n",
" 'index': 417,\n",
" 'word': ',',\n",
" 'start': 1537,\n",
" 'end': 1538},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7374973,\n",
" 'index': 418,\n",
" 'word': '▁',\n",
" 'start': 1538,\n",
" 'end': 1539},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7297504,\n",
" 'index': 419,\n",
" 'word': 'which',\n",
" 'start': 1539,\n",
" 'end': 1544},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7293893,\n",
" 'index': 420,\n",
" 'word': '▁',\n",
" 'start': 1544,\n",
" 'end': 1545},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7144947,\n",
" 'index': 421,\n",
" 'word': 'could',\n",
" 'start': 1545,\n",
" 'end': 1550},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7338257,\n",
" 'index': 422,\n",
" 'word': '▁mean',\n",
" 'start': 1550,\n",
" 'end': 1555},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76099664,\n",
" 'index': 423,\n",
" 'word': '▁alien',\n",
" 'start': 1555,\n",
" 'end': 1561},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7559823,\n",
" 'index': 424,\n",
" 'word': '▁',\n",
" 'start': 1561,\n",
" 'end': 1562},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7338846,\n",
" 'index': 425,\n",
" 'word': 'marking',\n",
" 'start': 1562,\n",
" 'end': 1569},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7261454,\n",
" 'index': 426,\n",
" 'word': 's',\n",
" 'start': 1569,\n",
" 'end': 1570},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74386823,\n",
" 'index': 427,\n",
" 'word': '▁were',\n",
" 'start': 1570,\n",
" 'end': 1575},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7308139,\n",
" 'index': 428,\n",
" 'word': '▁',\n",
" 'start': 1575,\n",
" 'end': 1576},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7243498,\n",
" 'index': 429,\n",
" 'word': 'hidden',\n",
" 'start': 1576,\n",
" 'end': 1582},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7429684,\n",
" 'index': 430,\n",
" 'word': '▁by',\n",
" 'start': 1582,\n",
" 'end': 1585},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72541994,\n",
" 'index': 431,\n",
" 'word': '▁ha',\n",
" 'start': 1585,\n",
" 'end': 1588},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6984805,\n",
" 'index': 432,\n",
" 'word': 'ze',\n",
" 'start': 1588,\n",
" 'end': 1590},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73156893,\n",
" 'index': 433,\n",
" 'word': '\"',\n",
" 'start': 1590,\n",
" 'end': 1591},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.82595235,\n",
" 'index': 434,\n",
" 'word': '▁Well',\n",
" 'start': 1591,\n",
" 'end': 1596},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.78789514,\n",
" 'index': 435,\n",
" 'word': '▁no',\n",
" 'start': 1596,\n",
" 'end': 1599},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7215853,\n",
" 'index': 436,\n",
" 'word': ',',\n",
" 'start': 1599,\n",
" 'end': 1600},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7419579,\n",
" 'index': 437,\n",
" 'word': '▁yes',\n",
" 'start': 1600,\n",
" 'end': 1604},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7984193,\n",
" 'index': 438,\n",
" 'word': '▁that',\n",
" 'start': 1604,\n",
" 'end': 1609},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7603877,\n",
" 'index': 439,\n",
" 'word': '▁',\n",
" 'start': 1609,\n",
" 'end': 1610},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7185078,\n",
" 'index': 440,\n",
" 'word': 'rumor',\n",
" 'start': 1610,\n",
" 'end': 1615},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72666425,\n",
" 'index': 441,\n",
" 'word': '▁',\n",
" 'start': 1615,\n",
" 'end': 1616},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7199019,\n",
" 'index': 442,\n",
" 'word': 'started',\n",
" 'start': 1616,\n",
" 'end': 1623},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6944407,\n",
" 'index': 443,\n",
" 'word': ',',\n",
" 'start': 1623,\n",
" 'end': 1624},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7404226,\n",
" 'index': 444,\n",
" 'word': '▁but',\n",
" 'start': 1624,\n",
" 'end': 1628},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.782483,\n",
" 'index': 445,\n",
" 'word': '▁to',\n",
" 'start': 1628,\n",
" 'end': 1631},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.728395,\n",
" 'index': 446,\n",
" 'word': '▁prove',\n",
" 'start': 1631,\n",
" 'end': 1637},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7058425,\n",
" 'index': 447,\n",
" 'word': '▁them',\n",
" 'start': 1637,\n",
" 'end': 1642},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71794623,\n",
" 'index': 448,\n",
" 'word': '▁wrong',\n",
" 'start': 1642,\n",
" 'end': 1648},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7216178,\n",
" 'index': 449,\n",
" 'word': '▁on',\n",
" 'start': 1648,\n",
" 'end': 1651},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7217002,\n",
" 'index': 450,\n",
" 'word': '▁April',\n",
" 'start': 1651,\n",
" 'end': 1657},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.69683576,\n",
" 'index': 451,\n",
" 'word': '▁8,',\n",
" 'start': 1657,\n",
" 'end': 1660},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6255177,\n",
" 'index': 452,\n",
" 'word': '▁2001',\n",
" 'start': 1660,\n",
" 'end': 1665},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74123895,\n",
" 'index': 453,\n",
" 'word': '▁we',\n",
" 'start': 1665,\n",
" 'end': 1668},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7217462,\n",
" 'index': 454,\n",
" 'word': '▁decide',\n",
" 'start': 1668,\n",
" 'end': 1675},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.68256843,\n",
" 'index': 455,\n",
" 'word': 'd',\n",
" 'start': 1675,\n",
" 'end': 1676},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.69462407,\n",
" 'index': 456,\n",
" 'word': '▁to',\n",
" 'start': 1676,\n",
" 'end': 1679},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7120118,\n",
" 'index': 457,\n",
" 'word': '▁take',\n",
" 'start': 1679,\n",
" 'end': 1684},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.727404,\n",
" 'index': 458,\n",
" 'word': '▁',\n",
" 'start': 1684,\n",
" 'end': 1685},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7168176,\n",
" 'index': 459,\n",
" 'word': 'another',\n",
" 'start': 1685,\n",
" 'end': 1692},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6914268,\n",
" 'index': 460,\n",
" 'word': '▁picture',\n",
" 'start': 1692,\n",
" 'end': 1700},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.65116674,\n",
" 'index': 461,\n",
" 'word': ',',\n",
" 'start': 1700,\n",
" 'end': 1701},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7633309,\n",
" 'index': 462,\n",
" 'word': '▁',\n",
" 'start': 1701,\n",
" 'end': 1702},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7356425,\n",
" 'index': 463,\n",
" 'word': 'making',\n",
" 'start': 1702,\n",
" 'end': 1708},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7292783,\n",
" 'index': 464,\n",
" 'word': '▁sure',\n",
" 'start': 1708,\n",
" 'end': 1713},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7169685,\n",
" 'index': 465,\n",
" 'word': '▁it',\n",
" 'start': 1713,\n",
" 'end': 1716},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7440418,\n",
" 'index': 466,\n",
" 'word': '▁was',\n",
" 'start': 1716,\n",
" 'end': 1720},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.69143945,\n",
" 'index': 467,\n",
" 'word': '▁',\n",
" 'start': 1720,\n",
" 'end': 1721},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7173495,\n",
" 'index': 468,\n",
" 'word': 'a',\n",
" 'start': 1721,\n",
" 'end': 1722},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.68509066,\n",
" 'index': 469,\n",
" 'word': '▁cloud',\n",
" 'start': 1722,\n",
" 'end': 1728},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.66747963,\n",
" 'index': 470,\n",
" 'word': 'less',\n",
" 'start': 1728,\n",
" 'end': 1732},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.70866424,\n",
" 'index': 471,\n",
" 'word': '▁summer',\n",
" 'start': 1732,\n",
" 'end': 1739},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6921813,\n",
" 'index': 472,\n",
" 'word': '▁day',\n",
" 'start': 1739,\n",
" 'end': 1743},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71640414,\n",
" 'index': 473,\n",
" 'word': '.',\n",
" 'start': 1743,\n",
" 'end': 1744},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.739546,\n",
" 'index': 474,\n",
" 'word': '▁Malin',\n",
" 'start': 1744,\n",
" 'end': 1750},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7273382,\n",
" 'index': 475,\n",
" 'word': \"'\",\n",
" 'start': 1750,\n",
" 'end': 1751},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7459867,\n",
" 'index': 476,\n",
" 'word': 's',\n",
" 'start': 1751,\n",
" 'end': 1752},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73050207,\n",
" 'index': 477,\n",
" 'word': '▁team',\n",
" 'start': 1752,\n",
" 'end': 1757},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.735162,\n",
" 'index': 478,\n",
" 'word': '▁captur',\n",
" 'start': 1757,\n",
" 'end': 1764},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7368789,\n",
" 'index': 479,\n",
" 'word': 'ed',\n",
" 'start': 1764,\n",
" 'end': 1766},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7655242,\n",
" 'index': 480,\n",
" 'word': '▁an',\n",
" 'start': 1766,\n",
" 'end': 1769},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7452704,\n",
" 'index': 481,\n",
" 'word': '▁amazing',\n",
" 'start': 1769,\n",
" 'end': 1777},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71074563,\n",
" 'index': 482,\n",
" 'word': '▁photo',\n",
" 'start': 1777,\n",
" 'end': 1783},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7648886,\n",
" 'index': 483,\n",
" 'word': '▁',\n",
" 'start': 1783,\n",
" 'end': 1784},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7509412,\n",
" 'index': 484,\n",
" 'word': 'using',\n",
" 'start': 1784,\n",
" 'end': 1789},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74002814,\n",
" 'index': 485,\n",
" 'word': '▁the',\n",
" 'start': 1789,\n",
" 'end': 1793},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7145964,\n",
" 'index': 486,\n",
" 'word': '▁camera',\n",
" 'start': 1793,\n",
" 'end': 1800},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.69679904,\n",
" 'index': 487,\n",
" 'word': \"'\",\n",
" 'start': 1800,\n",
" 'end': 1801},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.70451593,\n",
" 'index': 488,\n",
" 'word': 's',\n",
" 'start': 1801,\n",
" 'end': 1802},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71261543,\n",
" 'index': 489,\n",
" 'word': '▁',\n",
" 'start': 1802,\n",
" 'end': 1803},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71001965,\n",
" 'index': 490,\n",
" 'word': 'absolute',\n",
" 'start': 1803,\n",
" 'end': 1811},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71867234,\n",
" 'index': 491,\n",
" 'word': '▁',\n",
" 'start': 1811,\n",
" 'end': 1812},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7250019,\n",
" 'index': 492,\n",
" 'word': 'maximum',\n",
" 'start': 1812,\n",
" 'end': 1819},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71001124,\n",
" 'index': 493,\n",
" 'word': '▁revolution',\n",
" 'start': 1819,\n",
" 'end': 1830},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6948604,\n",
" 'index': 494,\n",
" 'word': '.',\n",
" 'start': 1830,\n",
" 'end': 1831},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.8154096,\n",
" 'index': 495,\n",
" 'word': '▁With',\n",
" 'start': 1831,\n",
" 'end': 1836},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76373816,\n",
" 'index': 496,\n",
" 'word': '▁this',\n",
" 'start': 1836,\n",
" 'end': 1841},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7451351,\n",
" 'index': 497,\n",
" 'word': '▁camera',\n",
" 'start': 1841,\n",
" 'end': 1848},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.82403296,\n",
" 'index': 498,\n",
" 'word': '▁you',\n",
" 'start': 1848,\n",
" 'end': 1852},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.77782404,\n",
" 'index': 499,\n",
" 'word': '▁can',\n",
" 'start': 1852,\n",
" 'end': 1856},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.77488196,\n",
" 'index': 500,\n",
" 'word': '▁disc',\n",
" 'start': 1856,\n",
" 'end': 1861},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7763104,\n",
" 'index': 501,\n",
" 'word': 'er',\n",
" 'start': 1861,\n",
" 'end': 1863},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.78899276,\n",
" 'index': 502,\n",
" 'word': 'n',\n",
" 'start': 1863,\n",
" 'end': 1864},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7839318,\n",
" 'index': 503,\n",
" 'word': '▁things',\n",
" 'start': 1864,\n",
" 'end': 1871},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7864886,\n",
" 'index': 504,\n",
" 'word': '▁in',\n",
" 'start': 1871,\n",
" 'end': 1874},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7598064,\n",
" 'index': 505,\n",
" 'word': '▁',\n",
" 'start': 1874,\n",
" 'end': 1875},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76907665,\n",
" 'index': 506,\n",
" 'word': 'a',\n",
" 'start': 1875,\n",
" 'end': 1876},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.771052,\n",
" 'index': 507,\n",
" 'word': '▁digital',\n",
" 'start': 1876,\n",
" 'end': 1884},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.75097096,\n",
" 'index': 508,\n",
" 'word': '▁image',\n",
" 'start': 1884,\n",
" 'end': 1890},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72820973,\n",
" 'index': 509,\n",
" 'word': ',',\n",
" 'start': 1890,\n",
" 'end': 1891},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.703228,\n",
" 'index': 510,\n",
" 'word': '▁3',\n",
" 'start': 1891,\n",
" 'end': 1893},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74737346,\n",
" 'index': 511,\n",
" 'word': '▁times',\n",
" 'start': 1893,\n",
" 'end': 1899},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74640083,\n",
" 'index': 512,\n",
" 'word': '▁b',\n",
" 'start': 1899,\n",
" 'end': 1901},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73074967,\n",
" 'index': 513,\n",
" 'word': 'igger',\n",
" 'start': 1901,\n",
" 'end': 1906},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74877274,\n",
" 'index': 514,\n",
" 'word': '▁than',\n",
" 'start': 1906,\n",
" 'end': 1911},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7517363,\n",
" 'index': 515,\n",
" 'word': '▁the',\n",
" 'start': 1911,\n",
" 'end': 1915},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71109414,\n",
" 'index': 516,\n",
" 'word': '▁pixel',\n",
" 'start': 1915,\n",
" 'end': 1921},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7338081,\n",
" 'index': 517,\n",
" 'word': '▁size',\n",
" 'start': 1921,\n",
" 'end': 1926},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.8042635,\n",
" 'index': 518,\n",
" 'word': '▁',\n",
" 'start': 1926,\n",
" 'end': 1927},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74277866,\n",
" 'index': 519,\n",
" 'word': 'which',\n",
" 'start': 1927,\n",
" 'end': 1932},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.765812,\n",
" 'index': 520,\n",
" 'word': '▁',\n",
" 'start': 1932,\n",
" 'end': 1933},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73706234,\n",
" 'index': 521,\n",
" 'word': 'means',\n",
" 'start': 1933,\n",
" 'end': 1938},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7790774,\n",
" 'index': 522,\n",
" 'word': '▁if',\n",
" 'start': 1938,\n",
" 'end': 1941},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7646758,\n",
" 'index': 523,\n",
" 'word': '▁there',\n",
" 'start': 1941,\n",
" 'end': 1947},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7686542,\n",
" 'index': 524,\n",
" 'word': '▁were',\n",
" 'start': 1947,\n",
" 'end': 1952},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.77635634,\n",
" 'index': 525,\n",
" 'word': '▁any',\n",
" 'start': 1952,\n",
" 'end': 1956},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.77535146,\n",
" 'index': 526,\n",
" 'word': '▁',\n",
" 'start': 1956,\n",
" 'end': 1957},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7573227,\n",
" 'index': 527,\n",
" 'word': 'signs',\n",
" 'start': 1957,\n",
" 'end': 1962},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74084586,\n",
" 'index': 528,\n",
" 'word': '▁of',\n",
" 'start': 1962,\n",
" 'end': 1965},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7274466,\n",
" 'index': 529,\n",
" 'word': '▁life',\n",
" 'start': 1965,\n",
" 'end': 1970},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7055662,\n",
" 'index': 530,\n",
" 'word': ',',\n",
" 'start': 1970,\n",
" 'end': 1971},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74733704,\n",
" 'index': 531,\n",
" 'word': '▁you',\n",
" 'start': 1971,\n",
" 'end': 1975},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74577606,\n",
" 'index': 532,\n",
" 'word': '▁',\n",
" 'start': 1975,\n",
" 'end': 1976},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74793524,\n",
" 'index': 533,\n",
" 'word': 'could',\n",
" 'start': 1976,\n",
" 'end': 1981},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7183822,\n",
" 'index': 534,\n",
" 'word': '▁',\n",
" 'start': 1981,\n",
" 'end': 1982},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7445473,\n",
" 'index': 535,\n",
" 'word': 'e',\n",
" 'start': 1982,\n",
" 'end': 1983},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7600949,\n",
" 'index': 536,\n",
" 'word': 'asily',\n",
" 'start': 1983,\n",
" 'end': 1988},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7463867,\n",
" 'index': 537,\n",
" 'word': '▁see',\n",
" 'start': 1988,\n",
" 'end': 1992},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7444904,\n",
" 'index': 538,\n",
" 'word': '▁what',\n",
" 'start': 1992,\n",
" 'end': 1997},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7586323,\n",
" 'index': 539,\n",
" 'word': '▁the',\n",
" 'start': 1997,\n",
" 'end': 2001},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.75233936,\n",
" 'index': 540,\n",
" 'word': 'y',\n",
" 'start': 2001,\n",
" 'end': 2002},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7635048,\n",
" 'index': 541,\n",
" 'word': '▁were',\n",
" 'start': 2002,\n",
" 'end': 2007},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71720314,\n",
" 'index': 542,\n",
" 'word': '.',\n",
" 'start': 2007,\n",
" 'end': 2008},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7992655,\n",
" 'index': 543,\n",
" 'word': '▁What',\n",
" 'start': 2008,\n",
" 'end': 2013},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72489667,\n",
" 'index': 544,\n",
" 'word': '▁the',\n",
" 'start': 2013,\n",
" 'end': 2017},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.65808624,\n",
" 'index': 545,\n",
" 'word': '▁picture',\n",
" 'start': 2017,\n",
" 'end': 2025},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.69231373,\n",
" 'index': 546,\n",
" 'word': '▁show',\n",
" 'start': 2025,\n",
" 'end': 2030},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6862266,\n",
" 'index': 547,\n",
" 'word': 'ed',\n",
" 'start': 2030,\n",
" 'end': 2032},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7581496,\n",
" 'index': 548,\n",
" 'word': '▁was',\n",
" 'start': 2032,\n",
" 'end': 2036},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7601303,\n",
" 'index': 549,\n",
" 'word': '▁the',\n",
" 'start': 2036,\n",
" 'end': 2040},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72894543,\n",
" 'index': 550,\n",
" 'word': '▁but',\n",
" 'start': 2040,\n",
" 'end': 2044},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6921015,\n",
" 'index': 551,\n",
" 'word': 'te',\n",
" 'start': 2044,\n",
" 'end': 2046},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72469836,\n",
" 'index': 552,\n",
" 'word': '▁or',\n",
" 'start': 2046,\n",
" 'end': 2049},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.68593043,\n",
" 'index': 553,\n",
" 'word': '▁mesa',\n",
" 'start': 2049,\n",
" 'end': 2054},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.69867843,\n",
" 'index': 554,\n",
" 'word': ',',\n",
" 'start': 2054,\n",
" 'end': 2055},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71468014,\n",
" 'index': 555,\n",
" 'word': '▁',\n",
" 'start': 2055,\n",
" 'end': 2056},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.75708663,\n",
" 'index': 556,\n",
" 'word': 'which',\n",
" 'start': 2056,\n",
" 'end': 2061},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74836534,\n",
" 'index': 557,\n",
" 'word': '▁are',\n",
" 'start': 2061,\n",
" 'end': 2065},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73992217,\n",
" 'index': 558,\n",
" 'word': '▁land',\n",
" 'start': 2065,\n",
" 'end': 2070},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72869176,\n",
" 'index': 559,\n",
" 'word': 'form',\n",
" 'start': 2070,\n",
" 'end': 2074},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7219306,\n",
" 'index': 560,\n",
" 'word': 's',\n",
" 'start': 2074,\n",
" 'end': 2075},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7646589,\n",
" 'index': 561,\n",
" 'word': '▁common',\n",
" 'start': 2075,\n",
" 'end': 2082},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.78755254,\n",
" 'index': 562,\n",
" 'word': '▁around',\n",
" 'start': 2082,\n",
" 'end': 2089},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.81112164,\n",
" 'index': 563,\n",
" 'word': '▁the',\n",
" 'start': 2089,\n",
" 'end': 2093},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.764886,\n",
" 'index': 564,\n",
" 'word': '▁American',\n",
" 'start': 2093,\n",
" 'end': 2102},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7145308,\n",
" 'index': 565,\n",
" 'word': '▁West',\n",
" 'start': 2102,\n",
" 'end': 2107},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6772878,\n",
" 'index': 566,\n",
" 'word': '.',\n",
" 'start': 2107,\n",
" 'end': 2108}]"
]
},
"execution_count": 99,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"HiTZ/mdeberta-expl-extraction-multi\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"HiTZ/mdeberta-expl-extraction-multi\")\n",
"nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"\n",
"ner_results = nlp(text)\n",
"ner_results"
]
},
{
"cell_type": "markdown",
"id": "6d68cccb-bc9e-415c-babb-3dd2b2ff72cb",
"metadata": {},
"source": [
"## 47 rollerhafeezh-amikom/xlm-roberta-base-ner-augmentation"
]
},
{
"cell_type": "code",
"execution_count": 101,
"id": "34fc2413-4e56-430f-bb99-16869761b1c7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'entity': 'B-LOC',\n",
" 'score': 0.9935009,\n",
" 'index': 103,\n",
" 'word': '▁Cy',\n",
" 'start': 435,\n",
" 'end': 437},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.9510317,\n",
" 'index': 104,\n",
" 'word': 'do',\n",
" 'start': 437,\n",
" 'end': 439},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.96693367,\n",
" 'index': 105,\n",
" 'word': 'nia',\n",
" 'start': 439,\n",
" 'end': 442},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.7514709,\n",
" 'index': 264,\n",
" 'word': '▁Mars',\n",
" 'start': 1088,\n",
" 'end': 1092},\n",
" {'entity': 'B-DAT',\n",
" 'score': 0.9943066,\n",
" 'index': 315,\n",
" 'word': '▁April',\n",
" 'start': 1297,\n",
" 'end': 1302},\n",
" {'entity': 'I-DAT',\n",
" 'score': 0.99545693,\n",
" 'index': 316,\n",
" 'word': '▁5',\n",
" 'start': 1303,\n",
" 'end': 1304},\n",
" {'entity': 'I-DAT',\n",
" 'score': 0.9937779,\n",
" 'index': 317,\n",
" 'word': ',',\n",
" 'start': 1304,\n",
" 'end': 1305},\n",
" {'entity': 'I-DAT',\n",
" 'score': 0.99656725,\n",
" 'index': 318,\n",
" 'word': '▁1998.',\n",
" 'start': 1306,\n",
" 'end': 1311},\n",
" {'entity': 'B-DAT',\n",
" 'score': 0.9939989,\n",
" 'index': 395,\n",
" 'word': '▁April',\n",
" 'start': 1652,\n",
" 'end': 1657},\n",
" {'entity': 'I-DAT',\n",
" 'score': 0.99549806,\n",
" 'index': 396,\n",
" 'word': '▁8',\n",
" 'start': 1658,\n",
" 'end': 1659},\n",
" {'entity': 'I-DAT',\n",
" 'score': 0.994966,\n",
" 'index': 397,\n",
" 'word': ',',\n",
" 'start': 1659,\n",
" 'end': 1660},\n",
" {'entity': 'I-DAT',\n",
" 'score': 0.99757344,\n",
" 'index': 398,\n",
" 'word': '▁2001',\n",
" 'start': 1661,\n",
" 'end': 1665},\n",
" {'entity': 'B-LOC',\n",
" 'score': 0.77366734,\n",
" 'index': 491,\n",
" 'word': '▁American',\n",
" 'start': 2094,\n",
" 'end': 2102},\n",
" {'entity': 'I-LOC',\n",
" 'score': 0.5598569,\n",
" 'index': 492,\n",
" 'word': '▁West',\n",
" 'start': 2103,\n",
" 'end': 2107}]"
]
},
"execution_count": 101,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"rollerhafeezh-amikom/xlm-roberta-base-ner-augmentation\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"rollerhafeezh-amikom/xlm-roberta-base-ner-augmentation\")\n",
"nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"\n",
"ner_results = nlp(text)\n",
"ner_results"
]
},
{
"cell_type": "code",
"execution_count": 102,
"id": "f5ae9b8f-c5ff-40ba-9956-1aa511d14d6c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"B-DAT 2\n",
"B-LOC 3\n",
"I-DAT 6\n",
"I-LOC 3\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word \n",
"B-DAT ▁April 2\n",
"B-LOC ▁American 1\n",
" ▁Cy 1\n",
" ▁Mars 1\n",
"I-DAT , 2\n",
" ▁1998. 1\n",
" ▁2001 1\n",
" ▁5 1\n",
" ▁8 1\n",
"I-LOC do 1\n",
" nia 1\n",
" ▁West 1\n",
"dtype: int64"
]
},
"execution_count": 102,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"\n",
"with open(\"47 rollerhafeezh-amikomxlm-roberta-base-ner-augmentation.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "1eebdd71-4bb0-4c49-af4d-10343c9e273e",
"metadata": {},
"source": [
"## 48 brettlin/distilbert-base-uncased-finetuned-ner"
]
},
{
"cell_type": "code",
"execution_count": 104,
"id": "39770965-3574-47f2-ad55-44bb35f11864",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'entity': 'LABEL_0',\n",
" 'score': 0.9996531,\n",
" 'index': 1,\n",
" 'word': 'so',\n",
" 'start': 0,\n",
" 'end': 2},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9995907,\n",
" 'index': 2,\n",
" 'word': ',',\n",
" 'start': 2,\n",
" 'end': 3},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99943215,\n",
" 'index': 3,\n",
" 'word': 'if',\n",
" 'start': 4,\n",
" 'end': 6},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9986737,\n",
" 'index': 4,\n",
" 'word': 'you',\n",
" 'start': 7,\n",
" 'end': 10},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9979754,\n",
" 'index': 5,\n",
" 'word': \"'\",\n",
" 'start': 10,\n",
" 'end': 11},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.995782,\n",
" 'index': 6,\n",
" 'word': 're',\n",
" 'start': 11,\n",
" 'end': 13},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98806703,\n",
" 'index': 7,\n",
" 'word': 'a',\n",
" 'start': 14,\n",
" 'end': 15},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.60142565,\n",
" 'index': 8,\n",
" 'word': 'nasa',\n",
" 'start': 16,\n",
" 'end': 20},\n",
" {'entity': 'LABEL_75',\n",
" 'score': 0.74799323,\n",
" 'index': 9,\n",
" 'word': 'scientist',\n",
" 'start': 21,\n",
" 'end': 30},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99963987,\n",
" 'index': 10,\n",
" 'word': ',',\n",
" 'start': 30,\n",
" 'end': 31},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99952126,\n",
" 'index': 11,\n",
" 'word': 'you',\n",
" 'start': 32,\n",
" 'end': 35},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.999762,\n",
" 'index': 12,\n",
" 'word': 'should',\n",
" 'start': 36,\n",
" 'end': 42},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9995994,\n",
" 'index': 13,\n",
" 'word': 'be',\n",
" 'start': 43,\n",
" 'end': 45},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9995302,\n",
" 'index': 14,\n",
" 'word': 'able',\n",
" 'start': 46,\n",
" 'end': 50},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99958926,\n",
" 'index': 15,\n",
" 'word': 'to',\n",
" 'start': 51,\n",
" 'end': 53},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99921143,\n",
" 'index': 16,\n",
" 'word': 'tell',\n",
" 'start': 54,\n",
" 'end': 58},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99929273,\n",
" 'index': 17,\n",
" 'word': 'me',\n",
" 'start': 59,\n",
" 'end': 61},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99949706,\n",
" 'index': 18,\n",
" 'word': 'the',\n",
" 'start': 62,\n",
" 'end': 65},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99955744,\n",
" 'index': 19,\n",
" 'word': 'whole',\n",
" 'start': 66,\n",
" 'end': 71},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99942434,\n",
" 'index': 20,\n",
" 'word': 'story',\n",
" 'start': 72,\n",
" 'end': 77},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99973875,\n",
" 'index': 21,\n",
" 'word': 'about',\n",
" 'start': 78,\n",
" 'end': 83},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9998621,\n",
" 'index': 22,\n",
" 'word': 'the',\n",
" 'start': 84,\n",
" 'end': 87},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9997615,\n",
" 'index': 23,\n",
" 'word': 'face',\n",
" 'start': 88,\n",
" 'end': 92},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9995735,\n",
" 'index': 24,\n",
" 'word': 'on',\n",
" 'start': 93,\n",
" 'end': 95},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98947805,\n",
" 'index': 25,\n",
" 'word': 'mars',\n",
" 'start': 96,\n",
" 'end': 100},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99989235,\n",
" 'index': 26,\n",
" 'word': ',',\n",
" 'start': 100,\n",
" 'end': 101},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9999428,\n",
" 'index': 27,\n",
" 'word': 'which',\n",
" 'start': 102,\n",
" 'end': 107},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9999577,\n",
" 'index': 28,\n",
" 'word': 'obviously',\n",
" 'start': 108,\n",
" 'end': 117},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9999269,\n",
" 'index': 29,\n",
" 'word': 'is',\n",
" 'start': 118,\n",
" 'end': 120},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.999874,\n",
" 'index': 30,\n",
" 'word': 'evidence',\n",
" 'start': 121,\n",
" 'end': 129},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9998374,\n",
" 'index': 31,\n",
" 'word': 'that',\n",
" 'start': 130,\n",
" 'end': 134},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9997681,\n",
" 'index': 32,\n",
" 'word': 'there',\n",
" 'start': 135,\n",
" 'end': 140},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99981254,\n",
" 'index': 33,\n",
" 'word': 'is',\n",
" 'start': 141,\n",
" 'end': 143},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99965465,\n",
" 'index': 34,\n",
" 'word': 'life',\n",
" 'start': 144,\n",
" 'end': 148},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9990865,\n",
" 'index': 35,\n",
" 'word': 'on',\n",
" 'start': 149,\n",
" 'end': 151},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.97573996,\n",
" 'index': 36,\n",
" 'word': 'mars',\n",
" 'start': 152,\n",
" 'end': 156},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9998772,\n",
" 'index': 37,\n",
" 'word': ',',\n",
" 'start': 156,\n",
" 'end': 157},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.999892,\n",
" 'index': 38,\n",
" 'word': 'and',\n",
" 'start': 158,\n",
" 'end': 161},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9999051,\n",
" 'index': 39,\n",
" 'word': 'that',\n",
" 'start': 162,\n",
" 'end': 166},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99995136,\n",
" 'index': 40,\n",
" 'word': 'the',\n",
" 'start': 167,\n",
" 'end': 170},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99992394,\n",
" 'index': 41,\n",
" 'word': 'face',\n",
" 'start': 171,\n",
" 'end': 175},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9999279,\n",
" 'index': 42,\n",
" 'word': 'was',\n",
" 'start': 176,\n",
" 'end': 179},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.999905,\n",
" 'index': 43,\n",
" 'word': 'created',\n",
" 'start': 180,\n",
" 'end': 187},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9991283,\n",
" 'index': 44,\n",
" 'word': 'by',\n",
" 'start': 188,\n",
" 'end': 190},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.89696616,\n",
" 'index': 45,\n",
" 'word': 'aliens',\n",
" 'start': 191,\n",
" 'end': 197},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99989593,\n",
" 'index': 46,\n",
" 'word': ',',\n",
" 'start': 197,\n",
" 'end': 198},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9995895,\n",
" 'index': 47,\n",
" 'word': 'correct',\n",
" 'start': 199,\n",
" 'end': 206},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99978584,\n",
" 'index': 48,\n",
" 'word': '?',\n",
" 'start': 206,\n",
" 'end': 207},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99943024,\n",
" 'index': 49,\n",
" 'word': '\"',\n",
" 'start': 207,\n",
" 'end': 208},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99595106,\n",
" 'index': 50,\n",
" 'word': 'no',\n",
" 'start': 209,\n",
" 'end': 211},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99868613,\n",
" 'index': 51,\n",
" 'word': ',',\n",
" 'start': 211,\n",
" 'end': 212},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9463645,\n",
" 'index': 52,\n",
" 'word': 'twenty',\n",
" 'start': 213,\n",
" 'end': 219},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.76300526,\n",
" 'index': 53,\n",
" 'word': 'five',\n",
" 'start': 220,\n",
" 'end': 224},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9725427,\n",
" 'index': 54,\n",
" 'word': 'years',\n",
" 'start': 225,\n",
" 'end': 230},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99003905,\n",
" 'index': 55,\n",
" 'word': 'ago',\n",
" 'start': 231,\n",
" 'end': 234},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9984659,\n",
" 'index': 56,\n",
" 'word': ',',\n",
" 'start': 234,\n",
" 'end': 235},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.8449111,\n",
" 'index': 57,\n",
" 'word': 'our',\n",
" 'start': 236,\n",
" 'end': 239},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.54668593,\n",
" 'index': 58,\n",
" 'word': 'viking',\n",
" 'start': 240,\n",
" 'end': 246},\n",
" {'entity': 'LABEL_14',\n",
" 'score': 0.71119046,\n",
" 'index': 59,\n",
" 'word': '1',\n",
" 'start': 247,\n",
" 'end': 248},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9257255,\n",
" 'index': 60,\n",
" 'word': 'spacecraft',\n",
" 'start': 249,\n",
" 'end': 259},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9979443,\n",
" 'index': 61,\n",
" 'word': 'was',\n",
" 'start': 260,\n",
" 'end': 263},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99194145,\n",
" 'index': 62,\n",
" 'word': 'circling',\n",
" 'start': 264,\n",
" 'end': 272},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9151495,\n",
" 'index': 63,\n",
" 'word': 'the',\n",
" 'start': 273,\n",
" 'end': 276},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9498145,\n",
" 'index': 64,\n",
" 'word': 'planet',\n",
" 'start': 277,\n",
" 'end': 283},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9979522,\n",
" 'index': 65,\n",
" 'word': ',',\n",
" 'start': 283,\n",
" 'end': 284},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9976399,\n",
" 'index': 66,\n",
" 'word': 'snapping',\n",
" 'start': 285,\n",
" 'end': 293},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9884976,\n",
" 'index': 67,\n",
" 'word': 'photos',\n",
" 'start': 294,\n",
" 'end': 300},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99981576,\n",
" 'index': 68,\n",
" 'word': ',',\n",
" 'start': 300,\n",
" 'end': 301},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99983835,\n",
" 'index': 69,\n",
" 'word': 'when',\n",
" 'start': 302,\n",
" 'end': 306},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9996313,\n",
" 'index': 70,\n",
" 'word': 'it',\n",
" 'start': 307,\n",
" 'end': 309},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99968636,\n",
" 'index': 71,\n",
" 'word': 'spotted',\n",
" 'start': 310,\n",
" 'end': 317},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9996791,\n",
" 'index': 72,\n",
" 'word': 'the',\n",
" 'start': 318,\n",
" 'end': 321},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99974054,\n",
" 'index': 73,\n",
" 'word': 'shadowy',\n",
" 'start': 322,\n",
" 'end': 329},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99973494,\n",
" 'index': 74,\n",
" 'word': 'likeness',\n",
" 'start': 330,\n",
" 'end': 338},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9996382,\n",
" 'index': 75,\n",
" 'word': 'of',\n",
" 'start': 339,\n",
" 'end': 341},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9991806,\n",
" 'index': 76,\n",
" 'word': 'a',\n",
" 'start': 342,\n",
" 'end': 343},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9973673,\n",
" 'index': 77,\n",
" 'word': 'human',\n",
" 'start': 344,\n",
" 'end': 349},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.997843,\n",
" 'index': 78,\n",
" 'word': 'face',\n",
" 'start': 350,\n",
" 'end': 354},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99921536,\n",
" 'index': 79,\n",
" 'word': '.',\n",
" 'start': 354,\n",
" 'end': 355},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9981499,\n",
" 'index': 80,\n",
" 'word': 'us',\n",
" 'start': 356,\n",
" 'end': 358},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99827325,\n",
" 'index': 81,\n",
" 'word': 'scientists',\n",
" 'start': 359,\n",
" 'end': 369},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99968076,\n",
" 'index': 82,\n",
" 'word': 'figured',\n",
" 'start': 370,\n",
" 'end': 377},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9997644,\n",
" 'index': 83,\n",
" 'word': 'out',\n",
" 'start': 378,\n",
" 'end': 381},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99987495,\n",
" 'index': 84,\n",
" 'word': 'that',\n",
" 'start': 382,\n",
" 'end': 386},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9998528,\n",
" 'index': 85,\n",
" 'word': 'it',\n",
" 'start': 387,\n",
" 'end': 389},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9998704,\n",
" 'index': 86,\n",
" 'word': 'was',\n",
" 'start': 390,\n",
" 'end': 393},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9998265,\n",
" 'index': 87,\n",
" 'word': 'just',\n",
" 'start': 394,\n",
" 'end': 398},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99902964,\n",
" 'index': 88,\n",
" 'word': 'another',\n",
" 'start': 399,\n",
" 'end': 406},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9982116,\n",
" 'index': 89,\n",
" 'word': 'martian',\n",
" 'start': 407,\n",
" 'end': 414},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99879277,\n",
" 'index': 90,\n",
" 'word': 'mesa',\n",
" 'start': 415,\n",
" 'end': 419},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99952435,\n",
" 'index': 91,\n",
" 'word': ',',\n",
" 'start': 419,\n",
" 'end': 420},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99871767,\n",
" 'index': 92,\n",
" 'word': 'common',\n",
" 'start': 421,\n",
" 'end': 427},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99144477,\n",
" 'index': 93,\n",
" 'word': 'around',\n",
" 'start': 428,\n",
" 'end': 434},\n",
" {'entity': 'LABEL_20',\n",
" 'score': 0.7289493,\n",
" 'index': 94,\n",
" 'word': 'cy',\n",
" 'start': 435,\n",
" 'end': 437},\n",
" {'entity': 'LABEL_20',\n",
" 'score': 0.6414867,\n",
" 'index': 95,\n",
" 'word': '##don',\n",
" 'start': 437,\n",
" 'end': 440},\n",
" {'entity': 'LABEL_20',\n",
" 'score': 0.74183536,\n",
" 'index': 96,\n",
" 'word': '##ia',\n",
" 'start': 440,\n",
" 'end': 442},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9998971,\n",
" 'index': 97,\n",
" 'word': ',',\n",
" 'start': 442,\n",
" 'end': 443},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99993026,\n",
" 'index': 98,\n",
" 'word': 'only',\n",
" 'start': 444,\n",
" 'end': 448},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9999012,\n",
" 'index': 99,\n",
" 'word': 'this',\n",
" 'start': 449,\n",
" 'end': 453},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9998801,\n",
" 'index': 100,\n",
" 'word': 'one',\n",
" 'start': 454,\n",
" 'end': 457},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9999132,\n",
" 'index': 101,\n",
" 'word': 'had',\n",
" 'start': 458,\n",
" 'end': 461},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99989045,\n",
" 'index': 102,\n",
" 'word': 'shadows',\n",
" 'start': 462,\n",
" 'end': 469},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9997019,\n",
" 'index': 103,\n",
" 'word': 'that',\n",
" 'start': 470,\n",
" 'end': 474},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99966216,\n",
" 'index': 104,\n",
" 'word': 'made',\n",
" 'start': 475,\n",
" 'end': 479},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9997868,\n",
" 'index': 105,\n",
" 'word': 'it',\n",
" 'start': 480,\n",
" 'end': 482},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.999015,\n",
" 'index': 106,\n",
" 'word': 'look',\n",
" 'start': 483,\n",
" 'end': 487},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.993024,\n",
" 'index': 107,\n",
" 'word': 'like',\n",
" 'start': 488,\n",
" 'end': 492},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.8148152,\n",
" 'index': 108,\n",
" 'word': 'an',\n",
" 'start': 493,\n",
" 'end': 495},\n",
" {'entity': 'LABEL_73',\n",
" 'score': 0.92523,\n",
" 'index': 109,\n",
" 'word': 'egypt',\n",
" 'start': 496,\n",
" 'end': 501},\n",
" {'entity': 'LABEL_74',\n",
" 'score': 0.6341769,\n",
" 'index': 110,\n",
" 'word': '##ion',\n",
" 'start': 501,\n",
" 'end': 504},\n",
" {'entity': 'LABEL_75',\n",
" 'score': 0.9717007,\n",
" 'index': 111,\n",
" 'word': 'pharaoh',\n",
" 'start': 505,\n",
" 'end': 512},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.79483175,\n",
" 'index': 112,\n",
" 'word': '.',\n",
" 'start': 512,\n",
" 'end': 513},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9997398,\n",
" 'index': 113,\n",
" 'word': 'very',\n",
" 'start': 514,\n",
" 'end': 518},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9993025,\n",
" 'index': 114,\n",
" 'word': 'few',\n",
" 'start': 519,\n",
" 'end': 522},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99925786,\n",
" 'index': 115,\n",
" 'word': 'days',\n",
" 'start': 523,\n",
" 'end': 527},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99947757,\n",
" 'index': 116,\n",
" 'word': 'later',\n",
" 'start': 528,\n",
" 'end': 533},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99968624,\n",
" 'index': 117,\n",
" 'word': ',',\n",
" 'start': 533,\n",
" 'end': 534},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99973804,\n",
" 'index': 118,\n",
" 'word': 'we',\n",
" 'start': 535,\n",
" 'end': 537},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99975663,\n",
" 'index': 119,\n",
" 'word': 'revealed',\n",
" 'start': 538,\n",
" 'end': 546},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99991214,\n",
" 'index': 120,\n",
" 'word': 'the',\n",
" 'start': 547,\n",
" 'end': 550},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99990284,\n",
" 'index': 121,\n",
" 'word': 'image',\n",
" 'start': 551,\n",
" 'end': 556},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.997297,\n",
" 'index': 122,\n",
" 'word': 'for',\n",
" 'start': 557,\n",
" 'end': 560},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9489387,\n",
" 'index': 123,\n",
" 'word': 'all',\n",
" 'start': 561,\n",
" 'end': 564},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.90861577,\n",
" 'index': 124,\n",
" 'word': 'to',\n",
" 'start': 565,\n",
" 'end': 567},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99558485,\n",
" 'index': 125,\n",
" 'word': 'see',\n",
" 'start': 568,\n",
" 'end': 571},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99955446,\n",
" 'index': 126,\n",
" 'word': ',',\n",
" 'start': 571,\n",
" 'end': 572},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9995072,\n",
" 'index': 127,\n",
" 'word': 'and',\n",
" 'start': 573,\n",
" 'end': 576},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9996469,\n",
" 'index': 128,\n",
" 'word': 'we',\n",
" 'start': 577,\n",
" 'end': 579},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9996728,\n",
" 'index': 129,\n",
" 'word': 'made',\n",
" 'start': 580,\n",
" 'end': 584},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99978834,\n",
" 'index': 130,\n",
" 'word': 'sure',\n",
" 'start': 585,\n",
" 'end': 589},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9996469,\n",
" 'index': 131,\n",
" 'word': 'to',\n",
" 'start': 590,\n",
" 'end': 592},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9998093,\n",
" 'index': 132,\n",
" 'word': 'note',\n",
" 'start': 593,\n",
" 'end': 597},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99986935,\n",
" 'index': 133,\n",
" 'word': 'that',\n",
" 'start': 598,\n",
" 'end': 602},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9996062,\n",
" 'index': 134,\n",
" 'word': 'it',\n",
" 'start': 603,\n",
" 'end': 605},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9997197,\n",
" 'index': 135,\n",
" 'word': 'was',\n",
" 'start': 606,\n",
" 'end': 609},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9986713,\n",
" 'index': 136,\n",
" 'word': 'a',\n",
" 'start': 610,\n",
" 'end': 611},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99810624,\n",
" 'index': 137,\n",
" 'word': 'huge',\n",
" 'start': 612,\n",
" 'end': 616},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9986929,\n",
" 'index': 138,\n",
" 'word': 'rock',\n",
" 'start': 617,\n",
" 'end': 621},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9953111,\n",
" 'index': 139,\n",
" 'word': 'formation',\n",
" 'start': 622,\n",
" 'end': 631},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9994229,\n",
" 'index': 140,\n",
" 'word': 'that',\n",
" 'start': 632,\n",
" 'end': 636},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9998062,\n",
" 'index': 141,\n",
" 'word': 'just',\n",
" 'start': 637,\n",
" 'end': 641},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9986547,\n",
" 'index': 142,\n",
" 'word': 'resembled',\n",
" 'start': 642,\n",
" 'end': 651},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9972566,\n",
" 'index': 143,\n",
" 'word': 'a',\n",
" 'start': 652,\n",
" 'end': 653},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99356645,\n",
" 'index': 144,\n",
" 'word': 'human',\n",
" 'start': 654,\n",
" 'end': 659},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99711275,\n",
" 'index': 145,\n",
" 'word': 'head',\n",
" 'start': 660,\n",
" 'end': 664},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98437935,\n",
" 'index': 146,\n",
" 'word': 'and',\n",
" 'start': 665,\n",
" 'end': 668},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9968284,\n",
" 'index': 147,\n",
" 'word': 'face',\n",
" 'start': 669,\n",
" 'end': 673},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99973816,\n",
" 'index': 148,\n",
" 'word': ',',\n",
" 'start': 673,\n",
" 'end': 674},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9998491,\n",
" 'index': 149,\n",
" 'word': 'but',\n",
" 'start': 675,\n",
" 'end': 678},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9999168,\n",
" 'index': 150,\n",
" 'word': 'all',\n",
" 'start': 679,\n",
" 'end': 682},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99989974,\n",
" 'index': 151,\n",
" 'word': 'of',\n",
" 'start': 683,\n",
" 'end': 685},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9998741,\n",
" 'index': 152,\n",
" 'word': 'it',\n",
" 'start': 686,\n",
" 'end': 688},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9999095,\n",
" 'index': 153,\n",
" 'word': 'was',\n",
" 'start': 689,\n",
" 'end': 692},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9998404,\n",
" 'index': 154,\n",
" 'word': 'formed',\n",
" 'start': 693,\n",
" 'end': 699},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99984217,\n",
" 'index': 155,\n",
" 'word': 'by',\n",
" 'start': 700,\n",
" 'end': 702},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9990085,\n",
" 'index': 156,\n",
" 'word': 'shadows',\n",
" 'start': 703,\n",
" 'end': 710},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99881077,\n",
" 'index': 157,\n",
" 'word': '.',\n",
" 'start': 710,\n",
" 'end': 711},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9996306,\n",
" 'index': 158,\n",
" 'word': 'we',\n",
" 'start': 712,\n",
" 'end': 714},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99975604,\n",
" 'index': 159,\n",
" 'word': 'only',\n",
" 'start': 715,\n",
" 'end': 719},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9996767,\n",
" 'index': 160,\n",
" 'word': 'announced',\n",
" 'start': 720,\n",
" 'end': 729},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.999858,\n",
" 'index': 161,\n",
" 'word': 'it',\n",
" 'start': 730,\n",
" 'end': 732},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9996333,\n",
" 'index': 162,\n",
" 'word': 'because',\n",
" 'start': 733,\n",
" 'end': 740},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9987091,\n",
" 'index': 163,\n",
" 'word': 'we',\n",
" 'start': 741,\n",
" 'end': 743},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9981231,\n",
" 'index': 164,\n",
" 'word': 'thought',\n",
" 'start': 744,\n",
" 'end': 751},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9989918,\n",
" 'index': 165,\n",
" 'word': 'it',\n",
" 'start': 752,\n",
" 'end': 754},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9983368,\n",
" 'index': 166,\n",
" 'word': 'would',\n",
" 'start': 755,\n",
" 'end': 760},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99723864,\n",
" 'index': 167,\n",
" 'word': 'be',\n",
" 'start': 761,\n",
" 'end': 763},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9965023,\n",
" 'index': 168,\n",
" 'word': 'a',\n",
" 'start': 764,\n",
" 'end': 765},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9949131,\n",
" 'index': 169,\n",
" 'word': 'good',\n",
" 'start': 766,\n",
" 'end': 770},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99544805,\n",
" 'index': 170,\n",
" 'word': 'way',\n",
" 'start': 771,\n",
" 'end': 774},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9924279,\n",
" 'index': 171,\n",
" 'word': 'to',\n",
" 'start': 775,\n",
" 'end': 777},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9778621,\n",
" 'index': 172,\n",
" 'word': 'engage',\n",
" 'start': 778,\n",
" 'end': 784},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9591297,\n",
" 'index': 173,\n",
" 'word': 'the',\n",
" 'start': 785,\n",
" 'end': 788},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9785558,\n",
" 'index': 174,\n",
" 'word': 'public',\n",
" 'start': 789,\n",
" 'end': 795},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9795846,\n",
" 'index': 175,\n",
" 'word': 'with',\n",
" 'start': 796,\n",
" 'end': 800},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99311686,\n",
" 'index': 176,\n",
" 'word': 'nasa',\n",
" 'start': 801,\n",
" 'end': 805},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99412537,\n",
" 'index': 177,\n",
" 'word': \"'\",\n",
" 'start': 805,\n",
" 'end': 806},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9853887,\n",
" 'index': 178,\n",
" 'word': 's',\n",
" 'start': 806,\n",
" 'end': 807},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98601574,\n",
" 'index': 179,\n",
" 'word': 'findings',\n",
" 'start': 808,\n",
" 'end': 816},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99843234,\n",
" 'index': 180,\n",
" 'word': ',',\n",
" 'start': 816,\n",
" 'end': 817},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9949004,\n",
" 'index': 181,\n",
" 'word': 'and',\n",
" 'start': 818,\n",
" 'end': 821},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.990635,\n",
" 'index': 182,\n",
" 'word': 'at',\n",
" 'start': 822,\n",
" 'end': 824},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98665386,\n",
" 'index': 183,\n",
" 'word': '##rra',\n",
" 'start': 824,\n",
" 'end': 827},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99445266,\n",
" 'index': 184,\n",
" 'word': '##ct',\n",
" 'start': 827,\n",
" 'end': 829},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9913902,\n",
" 'index': 185,\n",
" 'word': 'attention',\n",
" 'start': 830,\n",
" 'end': 839},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99121255,\n",
" 'index': 186,\n",
" 'word': 'to',\n",
" 'start': 840,\n",
" 'end': 842},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9982174,\n",
" 'index': 187,\n",
" 'word': 'mars',\n",
" 'start': 843,\n",
" 'end': 847},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9925493,\n",
" 'index': 188,\n",
" 'word': '-',\n",
" 'start': 847,\n",
" 'end': 848},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9925673,\n",
" 'index': 189,\n",
" 'word': '-',\n",
" 'start': 848,\n",
" 'end': 849},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99865615,\n",
" 'index': 190,\n",
" 'word': 'and',\n",
" 'start': 850,\n",
" 'end': 853},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99926656,\n",
" 'index': 191,\n",
" 'word': 'it',\n",
" 'start': 854,\n",
" 'end': 856},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9956815,\n",
" 'index': 192,\n",
" 'word': 'did',\n",
" 'start': 857,\n",
" 'end': 860},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9459845,\n",
" 'index': 193,\n",
" 'word': '.',\n",
" 'start': 860,\n",
" 'end': 861},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9999012,\n",
" 'index': 194,\n",
" 'word': 'the',\n",
" 'start': 863,\n",
" 'end': 866},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9998677,\n",
" 'index': 195,\n",
" 'word': 'face',\n",
" 'start': 867,\n",
" 'end': 871},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99969125,\n",
" 'index': 196,\n",
" 'word': 'on',\n",
" 'start': 872,\n",
" 'end': 874},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99862087,\n",
" 'index': 197,\n",
" 'word': 'mars',\n",
" 'start': 875,\n",
" 'end': 879},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99946374,\n",
" 'index': 198,\n",
" 'word': 'soon',\n",
" 'start': 880,\n",
" 'end': 884},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99820757,\n",
" 'index': 199,\n",
" 'word': 'became',\n",
" 'start': 885,\n",
" 'end': 891},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9943071,\n",
" 'index': 200,\n",
" 'word': 'a',\n",
" 'start': 892,\n",
" 'end': 893},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9949462,\n",
" 'index': 201,\n",
" 'word': 'pop',\n",
" 'start': 894,\n",
" 'end': 897},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.996988,\n",
" 'index': 202,\n",
" 'word': 'icon',\n",
" 'start': 898,\n",
" 'end': 902},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99945325,\n",
" 'index': 203,\n",
" 'word': ';',\n",
" 'start': 902,\n",
" 'end': 903},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9961422,\n",
" 'index': 204,\n",
" 'word': 'shot',\n",
" 'start': 904,\n",
" 'end': 908},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9605378,\n",
" 'index': 205,\n",
" 'word': 'in',\n",
" 'start': 909,\n",
" 'end': 911},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9329523,\n",
" 'index': 206,\n",
" 'word': 'movies',\n",
" 'start': 912,\n",
" 'end': 918},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99609864,\n",
" 'index': 207,\n",
" 'word': ',',\n",
" 'start': 918,\n",
" 'end': 919},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9907041,\n",
" 'index': 208,\n",
" 'word': 'appeared',\n",
" 'start': 920,\n",
" 'end': 928},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.95241463,\n",
" 'index': 209,\n",
" 'word': 'in',\n",
" 'start': 929,\n",
" 'end': 931},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.86189944,\n",
" 'index': 210,\n",
" 'word': 'books',\n",
" 'start': 932,\n",
" 'end': 937},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.8810842,\n",
" 'index': 211,\n",
" 'word': ',',\n",
" 'start': 937,\n",
" 'end': 938},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.7707626,\n",
" 'index': 212,\n",
" 'word': 'magazines',\n",
" 'start': 939,\n",
" 'end': 948},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9275165,\n",
" 'index': 213,\n",
" 'word': ',',\n",
" 'start': 948,\n",
" 'end': 949},\n",
" {'entity': 'LABEL_73',\n",
" 'score': 0.4515065,\n",
" 'index': 214,\n",
" 'word': 'radio',\n",
" 'start': 950,\n",
" 'end': 955},\n",
" {'entity': 'LABEL_74',\n",
" 'score': 0.871169,\n",
" 'index': 215,\n",
" 'word': 'talk',\n",
" 'start': 956,\n",
" 'end': 960},\n",
" {'entity': 'LABEL_75',\n",
" 'score': 0.7055705,\n",
" 'index': 216,\n",
" 'word': 'shows',\n",
" 'start': 961,\n",
" 'end': 966},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.95585626,\n",
" 'index': 217,\n",
" 'word': ',',\n",
" 'start': 966,\n",
" 'end': 967},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.76473933,\n",
" 'index': 218,\n",
" 'word': 'and',\n",
" 'start': 968,\n",
" 'end': 971},\n",
" {'entity': 'LABEL_14',\n",
" 'score': 0.3433743,\n",
" 'index': 219,\n",
" 'word': 'haunted',\n",
" 'start': 972,\n",
" 'end': 979},\n",
" {'entity': 'LABEL_14',\n",
" 'score': 0.6025852,\n",
" 'index': 220,\n",
" 'word': 'grocery',\n",
" 'start': 980,\n",
" 'end': 987},\n",
" {'entity': 'LABEL_14',\n",
" 'score': 0.50706923,\n",
" 'index': 221,\n",
" 'word': 'store',\n",
" 'start': 988,\n",
" 'end': 993},\n",
" {'entity': 'LABEL_14',\n",
" 'score': 0.37388822,\n",
" 'index': 222,\n",
" 'word': 'check',\n",
" 'start': 994,\n",
" 'end': 999},\n",
" {'entity': 'LABEL_14',\n",
" 'score': 0.5225139,\n",
" 'index': 223,\n",
" 'word': '##out',\n",
" 'start': 999,\n",
" 'end': 1002},\n",
" {'entity': 'LABEL_75',\n",
" 'score': 0.5667381,\n",
" 'index': 224,\n",
" 'word': 'lines',\n",
" 'start': 1003,\n",
" 'end': 1008},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9266954,\n",
" 'index': 225,\n",
" 'word': 'for',\n",
" 'start': 1009,\n",
" 'end': 1012},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9151958,\n",
" 'index': 226,\n",
" 'word': '25',\n",
" 'start': 1013,\n",
" 'end': 1015},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.93549967,\n",
" 'index': 227,\n",
" 'word': 'years',\n",
" 'start': 1016,\n",
" 'end': 1021},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.8062772,\n",
" 'index': 228,\n",
" 'word': '.',\n",
" 'start': 1021,\n",
" 'end': 1022},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9965282,\n",
" 'index': 229,\n",
" 'word': 'some',\n",
" 'start': 1023,\n",
" 'end': 1027},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99480104,\n",
" 'index': 230,\n",
" 'word': 'people',\n",
" 'start': 1028,\n",
" 'end': 1034},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9963915,\n",
" 'index': 231,\n",
" 'word': 'thought',\n",
" 'start': 1035,\n",
" 'end': 1042},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99992347,\n",
" 'index': 232,\n",
" 'word': 'the',\n",
" 'start': 1043,\n",
" 'end': 1046},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99991715,\n",
" 'index': 233,\n",
" 'word': 'natural',\n",
" 'start': 1047,\n",
" 'end': 1054},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99994695,\n",
" 'index': 234,\n",
" 'word': 'land',\n",
" 'start': 1055,\n",
" 'end': 1059},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99992645,\n",
" 'index': 235,\n",
" 'word': '##form',\n",
" 'start': 1059,\n",
" 'end': 1063},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99985206,\n",
" 'index': 236,\n",
" 'word': 'was',\n",
" 'start': 1064,\n",
" 'end': 1067},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99985313,\n",
" 'index': 237,\n",
" 'word': 'evidence',\n",
" 'start': 1068,\n",
" 'end': 1076},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9997029,\n",
" 'index': 238,\n",
" 'word': 'of',\n",
" 'start': 1077,\n",
" 'end': 1079},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9996062,\n",
" 'index': 239,\n",
" 'word': 'life',\n",
" 'start': 1080,\n",
" 'end': 1084},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99725705,\n",
" 'index': 240,\n",
" 'word': 'on',\n",
" 'start': 1085,\n",
" 'end': 1087},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.94723123,\n",
" 'index': 241,\n",
" 'word': 'mars',\n",
" 'start': 1088,\n",
" 'end': 1092},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9994099,\n",
" 'index': 242,\n",
" 'word': ',',\n",
" 'start': 1092,\n",
" 'end': 1093},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.997752,\n",
" 'index': 243,\n",
" 'word': 'and',\n",
" 'start': 1094,\n",
" 'end': 1097},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99548113,\n",
" 'index': 244,\n",
" 'word': 'that',\n",
" 'start': 1098,\n",
" 'end': 1102},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9863447,\n",
" 'index': 245,\n",
" 'word': 'us',\n",
" 'start': 1103,\n",
" 'end': 1105},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9810527,\n",
" 'index': 246,\n",
" 'word': 'scientists',\n",
" 'start': 1106,\n",
" 'end': 1116},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99548376,\n",
" 'index': 247,\n",
" 'word': 'wanted',\n",
" 'start': 1117,\n",
" 'end': 1123},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.996734,\n",
" 'index': 248,\n",
" 'word': 'to',\n",
" 'start': 1124,\n",
" 'end': 1126},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9977385,\n",
" 'index': 249,\n",
" 'word': 'hide',\n",
" 'start': 1127,\n",
" 'end': 1131},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9983668,\n",
" 'index': 250,\n",
" 'word': 'it',\n",
" 'start': 1132,\n",
" 'end': 1134},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99666613,\n",
" 'index': 251,\n",
" 'word': ',',\n",
" 'start': 1134,\n",
" 'end': 1135},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9986473,\n",
" 'index': 252,\n",
" 'word': 'but',\n",
" 'start': 1136,\n",
" 'end': 1139},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9985128,\n",
" 'index': 253,\n",
" 'word': 'really',\n",
" 'start': 1140,\n",
" 'end': 1146},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99492836,\n",
" 'index': 254,\n",
" 'word': ',',\n",
" 'start': 1146,\n",
" 'end': 1147},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.8159351,\n",
" 'index': 255,\n",
" 'word': 'the',\n",
" 'start': 1148,\n",
" 'end': 1151},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.65608,\n",
" 'index': 256,\n",
" 'word': 'defenders',\n",
" 'start': 1152,\n",
" 'end': 1161},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5334063,\n",
" 'index': 257,\n",
" 'word': 'of',\n",
" 'start': 1162,\n",
" 'end': 1164},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.36823764,\n",
" 'index': 258,\n",
" 'word': 'the',\n",
" 'start': 1165,\n",
" 'end': 1168},\n",
" {'entity': 'LABEL_14',\n",
" 'score': 0.7509814,\n",
" 'index': 259,\n",
" 'word': 'nasa',\n",
" 'start': 1169,\n",
" 'end': 1173},\n",
" {'entity': 'LABEL_14',\n",
" 'score': 0.6963956,\n",
" 'index': 260,\n",
" 'word': 'budget',\n",
" 'start': 1174,\n",
" 'end': 1180},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.85757244,\n",
" 'index': 261,\n",
" 'word': 'wish',\n",
" 'start': 1181,\n",
" 'end': 1185},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98319745,\n",
" 'index': 262,\n",
" 'word': 'there',\n",
" 'start': 1186,\n",
" 'end': 1191},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9912368,\n",
" 'index': 263,\n",
" 'word': 'was',\n",
" 'start': 1192,\n",
" 'end': 1195},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99493057,\n",
" 'index': 264,\n",
" 'word': 'ancient',\n",
" 'start': 1196,\n",
" 'end': 1203},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98931223,\n",
" 'index': 265,\n",
" 'word': 'civilization',\n",
" 'start': 1204,\n",
" 'end': 1216},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98382854,\n",
" 'index': 266,\n",
" 'word': 'on',\n",
" 'start': 1217,\n",
" 'end': 1219},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.96409446,\n",
" 'index': 267,\n",
" 'word': 'mars',\n",
" 'start': 1220,\n",
" 'end': 1224},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99208647,\n",
" 'index': 268,\n",
" 'word': '.',\n",
" 'start': 1224,\n",
" 'end': 1225},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9994597,\n",
" 'index': 269,\n",
" 'word': 'we',\n",
" 'start': 1226,\n",
" 'end': 1228},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99947494,\n",
" 'index': 270,\n",
" 'word': 'decided',\n",
" 'start': 1229,\n",
" 'end': 1236},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9994407,\n",
" 'index': 271,\n",
" 'word': 'to',\n",
" 'start': 1237,\n",
" 'end': 1239},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9995198,\n",
" 'index': 272,\n",
" 'word': 'take',\n",
" 'start': 1240,\n",
" 'end': 1244},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.999316,\n",
" 'index': 273,\n",
" 'word': 'another',\n",
" 'start': 1245,\n",
" 'end': 1252},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9993339,\n",
" 'index': 274,\n",
" 'word': 'shot',\n",
" 'start': 1253,\n",
" 'end': 1257},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9993856,\n",
" 'index': 275,\n",
" 'word': 'just',\n",
" 'start': 1258,\n",
" 'end': 1262},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9983365,\n",
" 'index': 276,\n",
" 'word': 'to',\n",
" 'start': 1263,\n",
" 'end': 1265},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9946636,\n",
" 'index': 277,\n",
" 'word': 'make',\n",
" 'start': 1266,\n",
" 'end': 1270},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9865796,\n",
" 'index': 278,\n",
" 'word': 'sure',\n",
" 'start': 1271,\n",
" 'end': 1275},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9879188,\n",
" 'index': 279,\n",
" 'word': 'we',\n",
" 'start': 1276,\n",
" 'end': 1278},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.93629557,\n",
" 'index': 280,\n",
" 'word': 'weren',\n",
" 'start': 1279,\n",
" 'end': 1284},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.81539416,\n",
" 'index': 281,\n",
" 'word': \"'\",\n",
" 'start': 1284,\n",
" 'end': 1285},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9333922,\n",
" 'index': 282,\n",
" 'word': 't',\n",
" 'start': 1285,\n",
" 'end': 1286},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9301563,\n",
" 'index': 283,\n",
" 'word': 'wrong',\n",
" 'start': 1287,\n",
" 'end': 1292},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9978479,\n",
" 'index': 284,\n",
" 'word': ',',\n",
" 'start': 1292,\n",
" 'end': 1293},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.97111917,\n",
" 'index': 285,\n",
" 'word': 'on',\n",
" 'start': 1294,\n",
" 'end': 1296},\n",
" {'entity': 'LABEL_24',\n",
" 'score': 0.9933061,\n",
" 'index': 286,\n",
" 'word': 'april',\n",
" 'start': 1297,\n",
" 'end': 1302},\n",
" {'entity': 'LABEL_24',\n",
" 'score': 0.99215525,\n",
" 'index': 287,\n",
" 'word': '5',\n",
" 'start': 1303,\n",
" 'end': 1304},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.7865959,\n",
" 'index': 288,\n",
" 'word': ',',\n",
" 'start': 1304,\n",
" 'end': 1305},\n",
" {'entity': 'LABEL_24',\n",
" 'score': 0.98310995,\n",
" 'index': 289,\n",
" 'word': '1998',\n",
" 'start': 1306,\n",
" 'end': 1310},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.86976135,\n",
" 'index': 290,\n",
" 'word': '.',\n",
" 'start': 1310,\n",
" 'end': 1311},\n",
" {'entity': 'LABEL_5',\n",
" 'score': 0.70154727,\n",
" 'index': 291,\n",
" 'word': 'michael',\n",
" 'start': 1312,\n",
" 'end': 1319},\n",
" {'entity': 'LABEL_7',\n",
" 'score': 0.97676635,\n",
" 'index': 292,\n",
" 'word': 'mali',\n",
" 'start': 1320,\n",
" 'end': 1324},\n",
" {'entity': 'LABEL_7',\n",
" 'score': 0.96749806,\n",
" 'index': 293,\n",
" 'word': '##n',\n",
" 'start': 1324,\n",
" 'end': 1325},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.978467,\n",
" 'index': 294,\n",
" 'word': 'and',\n",
" 'start': 1326,\n",
" 'end': 1329},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.8586074,\n",
" 'index': 295,\n",
" 'word': 'his',\n",
" 'start': 1330,\n",
" 'end': 1333},\n",
" {'entity': 'LABEL_14',\n",
" 'score': 0.525903,\n",
" 'index': 296,\n",
" 'word': 'mars',\n",
" 'start': 1334,\n",
" 'end': 1338},\n",
" {'entity': 'LABEL_74',\n",
" 'score': 0.3780676,\n",
" 'index': 297,\n",
" 'word': 'orbit',\n",
" 'start': 1339,\n",
" 'end': 1344},\n",
" {'entity': 'LABEL_74',\n",
" 'score': 0.4745661,\n",
" 'index': 298,\n",
" 'word': '##er',\n",
" 'start': 1344,\n",
" 'end': 1346},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9404595,\n",
" 'index': 299,\n",
" 'word': 'camera',\n",
" 'start': 1347,\n",
" 'end': 1353},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9960602,\n",
" 'index': 300,\n",
" 'word': 'team',\n",
" 'start': 1354,\n",
" 'end': 1358},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99968743,\n",
" 'index': 301,\n",
" 'word': 'took',\n",
" 'start': 1359,\n",
" 'end': 1363},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99965096,\n",
" 'index': 302,\n",
" 'word': 'a',\n",
" 'start': 1364,\n",
" 'end': 1365},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99950874,\n",
" 'index': 303,\n",
" 'word': 'picture',\n",
" 'start': 1366,\n",
" 'end': 1373},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9994449,\n",
" 'index': 304,\n",
" 'word': 'that',\n",
" 'start': 1374,\n",
" 'end': 1378},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99869066,\n",
" 'index': 305,\n",
" 'word': 'was',\n",
" 'start': 1379,\n",
" 'end': 1382},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.977216,\n",
" 'index': 306,\n",
" 'word': 'ten',\n",
" 'start': 1383,\n",
" 'end': 1386},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99482286,\n",
" 'index': 307,\n",
" 'word': 'times',\n",
" 'start': 1387,\n",
" 'end': 1392},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.994201,\n",
" 'index': 308,\n",
" 'word': 'sharpe',\n",
" 'start': 1393,\n",
" 'end': 1399},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9958853,\n",
" 'index': 309,\n",
" 'word': '##r',\n",
" 'start': 1399,\n",
" 'end': 1400},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9956436,\n",
" 'index': 310,\n",
" 'word': 'than',\n",
" 'start': 1401,\n",
" 'end': 1405},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9973666,\n",
" 'index': 311,\n",
" 'word': 'the',\n",
" 'start': 1406,\n",
" 'end': 1409},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9975478,\n",
" 'index': 312,\n",
" 'word': 'original',\n",
" 'start': 1410,\n",
" 'end': 1418},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9565762,\n",
" 'index': 313,\n",
" 'word': 'viking',\n",
" 'start': 1419,\n",
" 'end': 1425},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99581724,\n",
" 'index': 314,\n",
" 'word': 'photos',\n",
" 'start': 1426,\n",
" 'end': 1432},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9998056,\n",
" 'index': 315,\n",
" 'word': ',',\n",
" 'start': 1432,\n",
" 'end': 1433},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99983275,\n",
" 'index': 316,\n",
" 'word': 'revealing',\n",
" 'start': 1434,\n",
" 'end': 1443},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9998454,\n",
" 'index': 317,\n",
" 'word': 'a',\n",
" 'start': 1444,\n",
" 'end': 1445},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9998418,\n",
" 'index': 318,\n",
" 'word': 'natural',\n",
" 'start': 1446,\n",
" 'end': 1453},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99992263,\n",
" 'index': 319,\n",
" 'word': 'land',\n",
" 'start': 1454,\n",
" 'end': 1458},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9998939,\n",
" 'index': 320,\n",
" 'word': '##form',\n",
" 'start': 1458,\n",
" 'end': 1462},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99988616,\n",
" 'index': 321,\n",
" 'word': ',',\n",
" 'start': 1462,\n",
" 'end': 1463},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99994576,\n",
" 'index': 322,\n",
" 'word': 'which',\n",
" 'start': 1464,\n",
" 'end': 1469},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9998746,\n",
" 'index': 323,\n",
" 'word': 'meant',\n",
" 'start': 1470,\n",
" 'end': 1475},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.999754,\n",
" 'index': 324,\n",
" 'word': 'no',\n",
" 'start': 1476,\n",
" 'end': 1478},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9965797,\n",
" 'index': 325,\n",
" 'word': 'alien',\n",
" 'start': 1479,\n",
" 'end': 1484},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9921788,\n",
" 'index': 326,\n",
" 'word': 'monument',\n",
" 'start': 1485,\n",
" 'end': 1493},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99780697,\n",
" 'index': 327,\n",
" 'word': '.',\n",
" 'start': 1493,\n",
" 'end': 1494},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99951005,\n",
" 'index': 328,\n",
" 'word': '\"',\n",
" 'start': 1495,\n",
" 'end': 1496},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9996767,\n",
" 'index': 329,\n",
" 'word': 'but',\n",
" 'start': 1496,\n",
" 'end': 1499},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9999068,\n",
" 'index': 330,\n",
" 'word': 'that',\n",
" 'start': 1500,\n",
" 'end': 1504},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99991024,\n",
" 'index': 331,\n",
" 'word': 'picture',\n",
" 'start': 1505,\n",
" 'end': 1512},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99979264,\n",
" 'index': 332,\n",
" 'word': 'wasn',\n",
" 'start': 1513,\n",
" 'end': 1517},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99916065,\n",
" 'index': 333,\n",
" 'word': \"'\",\n",
" 'start': 1517,\n",
" 'end': 1518},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99970406,\n",
" 'index': 334,\n",
" 'word': 't',\n",
" 'start': 1518,\n",
" 'end': 1519},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99962485,\n",
" 'index': 335,\n",
" 'word': 'very',\n",
" 'start': 1520,\n",
" 'end': 1524},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99969125,\n",
" 'index': 336,\n",
" 'word': 'clear',\n",
" 'start': 1525,\n",
" 'end': 1530},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9984927,\n",
" 'index': 337,\n",
" 'word': 'at',\n",
" 'start': 1531,\n",
" 'end': 1533},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9987503,\n",
" 'index': 338,\n",
" 'word': 'all',\n",
" 'start': 1534,\n",
" 'end': 1537},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.999587,\n",
" 'index': 339,\n",
" 'word': ',',\n",
" 'start': 1537,\n",
" 'end': 1538},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99991965,\n",
" 'index': 340,\n",
" 'word': 'which',\n",
" 'start': 1539,\n",
" 'end': 1544},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99991965,\n",
" 'index': 341,\n",
" 'word': 'could',\n",
" 'start': 1545,\n",
" 'end': 1550},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9998634,\n",
" 'index': 342,\n",
" 'word': 'mean',\n",
" 'start': 1551,\n",
" 'end': 1555},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99986553,\n",
" 'index': 343,\n",
" 'word': 'alien',\n",
" 'start': 1556,\n",
" 'end': 1561},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99974054,\n",
" 'index': 344,\n",
" 'word': 'markings',\n",
" 'start': 1562,\n",
" 'end': 1570},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9996536,\n",
" 'index': 345,\n",
" 'word': 'were',\n",
" 'start': 1571,\n",
" 'end': 1575},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99719834,\n",
" 'index': 346,\n",
" 'word': 'hidden',\n",
" 'start': 1576,\n",
" 'end': 1582},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9992161,\n",
" 'index': 347,\n",
" 'word': 'by',\n",
" 'start': 1583,\n",
" 'end': 1585},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99676174,\n",
" 'index': 348,\n",
" 'word': 'haze',\n",
" 'start': 1586,\n",
" 'end': 1590},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9996555,\n",
" 'index': 349,\n",
" 'word': '\"',\n",
" 'start': 1590,\n",
" 'end': 1591},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9827891,\n",
" 'index': 350,\n",
" 'word': 'well',\n",
" 'start': 1592,\n",
" 'end': 1596},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.96698594,\n",
" 'index': 351,\n",
" 'word': 'no',\n",
" 'start': 1597,\n",
" 'end': 1599},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9919774,\n",
" 'index': 352,\n",
" 'word': ',',\n",
" 'start': 1599,\n",
" 'end': 1600},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9584662,\n",
" 'index': 353,\n",
" 'word': 'yes',\n",
" 'start': 1601,\n",
" 'end': 1604},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99590755,\n",
" 'index': 354,\n",
" 'word': 'that',\n",
" 'start': 1605,\n",
" 'end': 1609},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9936069,\n",
" 'index': 355,\n",
" 'word': 'rumor',\n",
" 'start': 1610,\n",
" 'end': 1615},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.97964877,\n",
" 'index': 356,\n",
" 'word': 'started',\n",
" 'start': 1616,\n",
" 'end': 1623},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.96804506,\n",
" 'index': 357,\n",
" 'word': ',',\n",
" 'start': 1623,\n",
" 'end': 1624},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9974667,\n",
" 'index': 358,\n",
" 'word': 'but',\n",
" 'start': 1625,\n",
" 'end': 1628},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99407756,\n",
" 'index': 359,\n",
" 'word': 'to',\n",
" 'start': 1629,\n",
" 'end': 1631},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99163276,\n",
" 'index': 360,\n",
" 'word': 'prove',\n",
" 'start': 1632,\n",
" 'end': 1637},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9937564,\n",
" 'index': 361,\n",
" 'word': 'them',\n",
" 'start': 1638,\n",
" 'end': 1642},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9836573,\n",
" 'index': 362,\n",
" 'word': 'wrong',\n",
" 'start': 1643,\n",
" 'end': 1648},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98289907,\n",
" 'index': 363,\n",
" 'word': 'on',\n",
" 'start': 1649,\n",
" 'end': 1651},\n",
" {'entity': 'LABEL_24',\n",
" 'score': 0.99786407,\n",
" 'index': 364,\n",
" 'word': 'april',\n",
" 'start': 1652,\n",
" 'end': 1657},\n",
" {'entity': 'LABEL_24',\n",
" 'score': 0.99625564,\n",
" 'index': 365,\n",
" 'word': '8',\n",
" 'start': 1658,\n",
" 'end': 1659},\n",
" {'entity': 'LABEL_24',\n",
" 'score': 0.65599644,\n",
" 'index': 366,\n",
" 'word': ',',\n",
" 'start': 1659,\n",
" 'end': 1660},\n",
" {'entity': 'LABEL_24',\n",
" 'score': 0.9716897,\n",
" 'index': 367,\n",
" 'word': '2001',\n",
" 'start': 1661,\n",
" 'end': 1665},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9993957,\n",
" 'index': 368,\n",
" 'word': 'we',\n",
" 'start': 1666,\n",
" 'end': 1668},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9994825,\n",
" 'index': 369,\n",
" 'word': 'decided',\n",
" 'start': 1669,\n",
" 'end': 1676},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9994381,\n",
" 'index': 370,\n",
" 'word': 'to',\n",
" 'start': 1677,\n",
" 'end': 1679},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9996543,\n",
" 'index': 371,\n",
" 'word': 'take',\n",
" 'start': 1680,\n",
" 'end': 1684},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99964786,\n",
" 'index': 372,\n",
" 'word': 'another',\n",
" 'start': 1685,\n",
" 'end': 1692},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.999567,\n",
" 'index': 373,\n",
" 'word': 'picture',\n",
" 'start': 1693,\n",
" 'end': 1700},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99953973,\n",
" 'index': 374,\n",
" 'word': ',',\n",
" 'start': 1700,\n",
" 'end': 1701},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99928975,\n",
" 'index': 375,\n",
" 'word': 'making',\n",
" 'start': 1702,\n",
" 'end': 1708},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9992249,\n",
" 'index': 376,\n",
" 'word': 'sure',\n",
" 'start': 1709,\n",
" 'end': 1713},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.993779,\n",
" 'index': 377,\n",
" 'word': 'it',\n",
" 'start': 1714,\n",
" 'end': 1716},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9736397,\n",
" 'index': 378,\n",
" 'word': 'was',\n",
" 'start': 1717,\n",
" 'end': 1720},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.7797392,\n",
" 'index': 379,\n",
" 'word': 'a',\n",
" 'start': 1721,\n",
" 'end': 1722},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.29156896,\n",
" 'index': 380,\n",
" 'word': 'cloud',\n",
" 'start': 1723,\n",
" 'end': 1728},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.40196428,\n",
" 'index': 381,\n",
" 'word': '##less',\n",
" 'start': 1728,\n",
" 'end': 1732},\n",
" {'entity': 'LABEL_10',\n",
" 'score': 0.2967023,\n",
" 'index': 382,\n",
" 'word': 'summer',\n",
" 'start': 1733,\n",
" 'end': 1739},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5588353,\n",
" 'index': 383,\n",
" 'word': 'day',\n",
" 'start': 1740,\n",
" 'end': 1743},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99896514,\n",
" 'index': 384,\n",
" 'word': '.',\n",
" 'start': 1743,\n",
" 'end': 1744},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.995736,\n",
" 'index': 385,\n",
" 'word': 'mali',\n",
" 'start': 1745,\n",
" 'end': 1749},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.977207,\n",
" 'index': 386,\n",
" 'word': '##n',\n",
" 'start': 1749,\n",
" 'end': 1750},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99926835,\n",
" 'index': 387,\n",
" 'word': \"'\",\n",
" 'start': 1750,\n",
" 'end': 1751},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99910814,\n",
" 'index': 388,\n",
" 'word': 's',\n",
" 'start': 1751,\n",
" 'end': 1752},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99894994,\n",
" 'index': 389,\n",
" 'word': 'team',\n",
" 'start': 1753,\n",
" 'end': 1757},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99952054,\n",
" 'index': 390,\n",
" 'word': 'captured',\n",
" 'start': 1758,\n",
" 'end': 1766},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99940646,\n",
" 'index': 391,\n",
" 'word': 'an',\n",
" 'start': 1767,\n",
" 'end': 1769},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9992894,\n",
" 'index': 392,\n",
" 'word': 'amazing',\n",
" 'start': 1770,\n",
" 'end': 1777},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99922264,\n",
" 'index': 393,\n",
" 'word': 'photo',\n",
" 'start': 1778,\n",
" 'end': 1783},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99742824,\n",
" 'index': 394,\n",
" 'word': 'using',\n",
" 'start': 1784,\n",
" 'end': 1789},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99749315,\n",
" 'index': 395,\n",
" 'word': 'the',\n",
" 'start': 1790,\n",
" 'end': 1793},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9970276,\n",
" 'index': 396,\n",
" 'word': 'camera',\n",
" 'start': 1794,\n",
" 'end': 1800},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.993164,\n",
" 'index': 397,\n",
" 'word': \"'\",\n",
" 'start': 1800,\n",
" 'end': 1801},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9909326,\n",
" 'index': 398,\n",
" 'word': 's',\n",
" 'start': 1801,\n",
" 'end': 1802},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9945222,\n",
" 'index': 399,\n",
" 'word': 'absolute',\n",
" 'start': 1803,\n",
" 'end': 1811},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99423146,\n",
" 'index': 400,\n",
" 'word': 'maximum',\n",
" 'start': 1812,\n",
" 'end': 1819},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.98917,\n",
" 'index': 401,\n",
" 'word': 'revolution',\n",
" 'start': 1820,\n",
" 'end': 1830},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.8007525,\n",
" 'index': 402,\n",
" 'word': '.',\n",
" 'start': 1830,\n",
" 'end': 1831},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9982333,\n",
" 'index': 403,\n",
" 'word': 'with',\n",
" 'start': 1832,\n",
" 'end': 1836},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99956816,\n",
" 'index': 404,\n",
" 'word': 'this',\n",
" 'start': 1837,\n",
" 'end': 1841},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9997185,\n",
" 'index': 405,\n",
" 'word': 'camera',\n",
" 'start': 1842,\n",
" 'end': 1848},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99758506,\n",
" 'index': 406,\n",
" 'word': 'you',\n",
" 'start': 1849,\n",
" 'end': 1852},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9970925,\n",
" 'index': 407,\n",
" 'word': 'can',\n",
" 'start': 1853,\n",
" 'end': 1856},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99829715,\n",
" 'index': 408,\n",
" 'word': 'disc',\n",
" 'start': 1857,\n",
" 'end': 1861},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9970024,\n",
" 'index': 409,\n",
" 'word': '##ern',\n",
" 'start': 1861,\n",
" 'end': 1864},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99906355,\n",
" 'index': 410,\n",
" 'word': 'things',\n",
" 'start': 1865,\n",
" 'end': 1871},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99950504,\n",
" 'index': 411,\n",
" 'word': 'in',\n",
" 'start': 1872,\n",
" 'end': 1874},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99918514,\n",
" 'index': 412,\n",
" 'word': 'a',\n",
" 'start': 1875,\n",
" 'end': 1876},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99943334,\n",
" 'index': 413,\n",
" 'word': 'digital',\n",
" 'start': 1877,\n",
" 'end': 1884},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9996412,\n",
" 'index': 414,\n",
" 'word': 'image',\n",
" 'start': 1885,\n",
" 'end': 1890},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9993812,\n",
" 'index': 415,\n",
" 'word': ',',\n",
" 'start': 1890,\n",
" 'end': 1891},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99675,\n",
" 'index': 416,\n",
" 'word': '3',\n",
" 'start': 1892,\n",
" 'end': 1893},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99862754,\n",
" 'index': 417,\n",
" 'word': 'times',\n",
" 'start': 1894,\n",
" 'end': 1899},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99918944,\n",
" 'index': 418,\n",
" 'word': 'bigger',\n",
" 'start': 1900,\n",
" 'end': 1906},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9990243,\n",
" 'index': 419,\n",
" 'word': 'than',\n",
" 'start': 1907,\n",
" 'end': 1911},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9996014,\n",
" 'index': 420,\n",
" 'word': 'the',\n",
" 'start': 1912,\n",
" 'end': 1915},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9998192,\n",
" 'index': 421,\n",
" 'word': 'pixel',\n",
" 'start': 1916,\n",
" 'end': 1921},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99965644,\n",
" 'index': 422,\n",
" 'word': 'size',\n",
" 'start': 1922,\n",
" 'end': 1926},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9998871,\n",
" 'index': 423,\n",
" 'word': 'which',\n",
" 'start': 1927,\n",
" 'end': 1932},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9996221,\n",
" 'index': 424,\n",
" 'word': 'means',\n",
" 'start': 1933,\n",
" 'end': 1938},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9996413,\n",
" 'index': 425,\n",
" 'word': 'if',\n",
" 'start': 1939,\n",
" 'end': 1941},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9990324,\n",
" 'index': 426,\n",
" 'word': 'there',\n",
" 'start': 1942,\n",
" 'end': 1947},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9986166,\n",
" 'index': 427,\n",
" 'word': 'were',\n",
" 'start': 1948,\n",
" 'end': 1952},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99817145,\n",
" 'index': 428,\n",
" 'word': 'any',\n",
" 'start': 1953,\n",
" 'end': 1956},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99731725,\n",
" 'index': 429,\n",
" 'word': 'signs',\n",
" 'start': 1957,\n",
" 'end': 1962},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99823487,\n",
" 'index': 430,\n",
" 'word': 'of',\n",
" 'start': 1963,\n",
" 'end': 1965},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99668664,\n",
" 'index': 431,\n",
" 'word': 'life',\n",
" 'start': 1966,\n",
" 'end': 1970},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9992847,\n",
" 'index': 432,\n",
" 'word': ',',\n",
" 'start': 1970,\n",
" 'end': 1971},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.995812,\n",
" 'index': 433,\n",
" 'word': 'you',\n",
" 'start': 1972,\n",
" 'end': 1975},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99794585,\n",
" 'index': 434,\n",
" 'word': 'could',\n",
" 'start': 1976,\n",
" 'end': 1981},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9973961,\n",
" 'index': 435,\n",
" 'word': 'easily',\n",
" 'start': 1982,\n",
" 'end': 1988},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9948614,\n",
" 'index': 436,\n",
" 'word': 'see',\n",
" 'start': 1989,\n",
" 'end': 1992},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99854785,\n",
" 'index': 437,\n",
" 'word': 'what',\n",
" 'start': 1993,\n",
" 'end': 1997},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99702233,\n",
" 'index': 438,\n",
" 'word': 'they',\n",
" 'start': 1998,\n",
" 'end': 2002},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9966197,\n",
" 'index': 439,\n",
" 'word': 'were',\n",
" 'start': 2003,\n",
" 'end': 2007},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99884593,\n",
" 'index': 440,\n",
" 'word': '.',\n",
" 'start': 2007,\n",
" 'end': 2008},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9997309,\n",
" 'index': 441,\n",
" 'word': 'what',\n",
" 'start': 2009,\n",
" 'end': 2013},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9998198,\n",
" 'index': 442,\n",
" 'word': 'the',\n",
" 'start': 2014,\n",
" 'end': 2017},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99978286,\n",
" 'index': 443,\n",
" 'word': 'picture',\n",
" 'start': 2018,\n",
" 'end': 2025},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9994942,\n",
" 'index': 444,\n",
" 'word': 'showed',\n",
" 'start': 2026,\n",
" 'end': 2032},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9993857,\n",
" 'index': 445,\n",
" 'word': 'was',\n",
" 'start': 2033,\n",
" 'end': 2036},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.64063627,\n",
" 'index': 446,\n",
" 'word': 'the',\n",
" 'start': 2037,\n",
" 'end': 2040},\n",
" {'entity': 'LABEL_10',\n",
" 'score': 0.68473804,\n",
" 'index': 447,\n",
" 'word': 'butte',\n",
" 'start': 2041,\n",
" 'end': 2046},\n",
" {'entity': 'LABEL_10',\n",
" 'score': 0.7311275,\n",
" 'index': 448,\n",
" 'word': 'or',\n",
" 'start': 2047,\n",
" 'end': 2049},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.47081882,\n",
" 'index': 449,\n",
" 'word': 'mesa',\n",
" 'start': 2050,\n",
" 'end': 2054},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.9972504,\n",
" 'index': 450,\n",
" 'word': ',',\n",
" 'start': 2054,\n",
" 'end': 2055},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99924064,\n",
" 'index': 451,\n",
" 'word': 'which',\n",
" 'start': 2056,\n",
" 'end': 2061},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99846965,\n",
" 'index': 452,\n",
" 'word': 'are',\n",
" 'start': 2062,\n",
" 'end': 2065},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99877447,\n",
" 'index': 453,\n",
" 'word': 'land',\n",
" 'start': 2066,\n",
" 'end': 2070},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99923384,\n",
" 'index': 454,\n",
" 'word': '##forms',\n",
" 'start': 2070,\n",
" 'end': 2075},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99003917,\n",
" 'index': 455,\n",
" 'word': 'common',\n",
" 'start': 2076,\n",
" 'end': 2082},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.907271,\n",
" 'index': 456,\n",
" 'word': 'around',\n",
" 'start': 2083,\n",
" 'end': 2089},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.33334938,\n",
" 'index': 457,\n",
" 'word': 'the',\n",
" 'start': 2090,\n",
" 'end': 2093},\n",
" {'entity': 'LABEL_10',\n",
" 'score': 0.66397417,\n",
" 'index': 458,\n",
" 'word': 'american',\n",
" 'start': 2094,\n",
" 'end': 2102},\n",
" {'entity': 'LABEL_11',\n",
" 'score': 0.86412615,\n",
" 'index': 459,\n",
" 'word': 'west',\n",
" 'start': 2103,\n",
" 'end': 2107},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.99960643,\n",
" 'index': 460,\n",
" 'word': '.',\n",
" 'start': 2107,\n",
" 'end': 2108}]"
]
},
"execution_count": 104,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"brettlin/distilbert-base-uncased-finetuned-ner\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"brettlin/distilbert-base-uncased-finetuned-ner\")\n",
"nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"\n",
"ner_results = nlp(text)\n",
"ner_results"
]
},
{
"cell_type": "code",
"execution_count": 105,
"id": "888f3316-8453-418b-be9a-266787210929",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"LABEL_0 423\n",
"LABEL_10 4\n",
"LABEL_11 1\n",
"LABEL_14 9\n",
"LABEL_20 3\n",
"LABEL_24 7\n",
"LABEL_5 1\n",
"LABEL_7 2\n",
"LABEL_73 2\n",
"LABEL_74 4\n",
"LABEL_75 4\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word \n",
"LABEL_0 9\n",
" ##ct 1\n",
" ##ern 1\n",
" ##form 2\n",
" ##forms 1\n",
" ##less 1\n",
" ##n 1\n",
" ##r 1\n",
" ##rra 1\n",
" , 33\n",
" - 2\n",
" . 12\n",
" 25 1\n",
" 3 1\n",
" ; 1\n",
" ? 1\n",
" a 10\n",
" able 1\n",
" about 1\n",
" absolute 1\n",
" ago 1\n",
" alien 2\n",
" aliens 1\n",
" all 3\n",
" amazing 1\n",
" an 2\n",
" ancient 1\n",
" and 8\n",
" announced 1\n",
" another 3\n",
" any 1\n",
" appeared 1\n",
" are 1\n",
" around 2\n",
" at 2\n",
" attention 1\n",
" be 2\n",
" became 1\n",
" because 1\n",
" bigger 1\n",
" books 1\n",
" but 4\n",
" by 3\n",
" camera 3\n",
" can 1\n",
" captured 1\n",
" circling 1\n",
" civilization 1\n",
" clear 1\n",
" cloud 1\n",
" common 2\n",
" correct 1\n",
" could 2\n",
" created 1\n",
" day 1\n",
" days 1\n",
" decided 2\n",
" defenders 1\n",
" did 1\n",
" digital 1\n",
" disc 1\n",
" easily 1\n",
" engage 1\n",
" evidence 2\n",
" face 5\n",
" few 1\n",
" figured 1\n",
" findings 1\n",
" five 1\n",
" for 2\n",
" formation 1\n",
" formed 1\n",
" good 1\n",
" had 1\n",
" haze 1\n",
" head 1\n",
" hidden 1\n",
" hide 1\n",
" his 1\n",
" huge 1\n",
" human 2\n",
" icon 1\n",
" if 2\n",
" image 2\n",
" in 3\n",
" is 2\n",
" it 10\n",
" just 3\n",
" land 3\n",
" later 1\n",
" life 3\n",
" like 1\n",
" likeness 1\n",
" look 1\n",
" made 2\n",
" magazines 1\n",
" make 1\n",
" making 1\n",
" mali 1\n",
" markings 1\n",
" mars 6\n",
" martian 1\n",
" maximum 1\n",
" me 1\n",
" mean 1\n",
" means 1\n",
" meant 1\n",
" mesa 2\n",
" monument 1\n",
" movies 1\n",
" nasa 2\n",
" natural 2\n",
" no 3\n",
" note 1\n",
" obviously 1\n",
" of 5\n",
" on 7\n",
" one 1\n",
" only 2\n",
" original 1\n",
" our 1\n",
" out 1\n",
" people 1\n",
" photo 1\n",
" photos 2\n",
" picture 4\n",
" pixel 1\n",
" planet 1\n",
" pop 1\n",
" prove 1\n",
" public 1\n",
" re 1\n",
" really 1\n",
" resembled 1\n",
" revealed 1\n",
" revealing 1\n",
" revolution 1\n",
" rock 1\n",
" rumor 1\n",
" s 3\n",
" scientists 2\n",
" see 2\n",
" shadows 2\n",
" shadowy 1\n",
" sharpe 1\n",
" shot 2\n",
" should 1\n",
" showed 1\n",
" signs 1\n",
" size 1\n",
" snapping 1\n",
" so 1\n",
" some 1\n",
" soon 1\n",
" spacecraft 1\n",
" spotted 1\n",
" started 1\n",
" story 1\n",
" sure 3\n",
" t 2\n",
" take 2\n",
" team 2\n",
" tell 1\n",
" ten 1\n",
" than 2\n",
" that 10\n",
" the 17\n",
" them 1\n",
" there 3\n",
" they 1\n",
" things 1\n",
" this 2\n",
" thought 2\n",
" times 2\n",
" to 10\n",
" took 1\n",
" twenty 1\n",
" us 2\n",
" using 1\n",
" very 2\n",
" viking 2\n",
" wanted 1\n",
" was 10\n",
" wasn 1\n",
" way 1\n",
" we 7\n",
" well 1\n",
" were 3\n",
" weren 1\n",
" what 2\n",
" when 1\n",
" which 5\n",
" whole 1\n",
" wish 1\n",
" with 2\n",
" would 1\n",
" wrong 2\n",
" years 2\n",
" yes 1\n",
" you 4\n",
"LABEL_10 american 1\n",
" butte 1\n",
" or 1\n",
" summer 1\n",
"LABEL_11 west 1\n",
"LABEL_14 ##out 1\n",
" 1 1\n",
" budget 1\n",
" check 1\n",
" grocery 1\n",
" haunted 1\n",
" mars 1\n",
" nasa 1\n",
" store 1\n",
"LABEL_20 ##don 1\n",
" ##ia 1\n",
" cy 1\n",
"LABEL_24 , 1\n",
" 1998 1\n",
" 2001 1\n",
" 5 1\n",
" 8 1\n",
" april 2\n",
"LABEL_5 michael 1\n",
"LABEL_7 ##n 1\n",
" mali 1\n",
"LABEL_73 egypt 1\n",
" radio 1\n",
"LABEL_74 ##er 1\n",
" ##ion 1\n",
" orbit 1\n",
" talk 1\n",
"LABEL_75 lines 1\n",
" pharaoh 1\n",
" scientist 1\n",
" shows 1\n",
"dtype: int64"
]
},
"execution_count": 105,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open(\"48 brettlindistilbert-base-uncased-finetuned-ner.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()\n",
"\n",
"\n",
"# ['O', \t0\n",
"# 'B-O', \t1\n",
"# 'I-O', \t2\n",
"# 'L-O', \t3\n",
"# 'U-O', \t4\n",
"# 'B-PER', \t5\n",
"# 'I-PER', \t6\n",
"# 'L-PER', \t7\n",
"# 'U-PER', \t8\n",
"# 'B-LOC', \t9\n",
"# 'I-LOC', \t10\n",
"# 'L-LOC', \t11\n",
"# 'U-LOC', \t12\n",
"# 'B-ORG', \t13\n",
"# 'I-ORG', \t14\n",
"# 'L-ORG', \t15\n",
"# 'U-ORG', \t16\n",
"# 'B-NRP', \t17\n",
"# 'I-NRP', \t18\n",
"# 'L-NRP', \t19\n",
"# 'U-NRP', \t20\n",
"# 'B-DATE_TIME', \t21\n",
"# 'I-DATE_TIME', \t22\n",
"# 'L-DATE_TIME', \t23\n",
"# 'U-DATE_TIME', \t24\n",
"# 'B-CREDIT_CARD', \t25\n",
"# 'I-CREDIT_CARD', \t26\n",
"# 'L-CREDIT_CARD', \t27\n",
"# 'U-CREDIT_CARD', \t28\n",
"# 'B-URL', \t29\n",
"# 'I-URL', \t30\n",
"# 'L-URL', \t31\n",
"# 'U-URL', \t32\n",
"# 'B-IBAN_CODE', \t33\n",
"# 'I-IBAN_CODE', \t34\n",
"# 'L-IBAN_CODE', \t35\n",
"# 'U-IBAN_CODE', \t36\n",
"# 'B-US_BANK_NUMBER', \t37\n",
"# 'I-US_BANK_NUMBER', \t38\n",
"# 'L-US_BANK_NUMBER', \t39\n",
"# 'U-US_BANK_NUMBER', \t40\n",
"# 'B-PHONE_NUMBER', \t41\n",
"# 'I-PHONE_NUMBER', \t42\n",
"# 'L-PHONE_NUMBER', \t43\n",
"# 'U-PHONE_NUMBER', \t44\n",
"# 'B-US_SSN', \t45\n",
"# 'I-US_SSN', \t46\n",
"# 'L-US_SSN', \t47\n",
"# 'U-US_SSN', \t48\n",
"# 'B-US_PASSPORT', \t49\n",
"# 'I-US_PASSPORT', \t50\n",
"# 'L-US_PASSPORT', \t51\n",
"# 'U-US_PASSPORT', \t52\n",
"# 'B-US_DRIVER_LICENSE', \t53\n",
"# 'I-US_DRIVER_LICENSE', \t54\n",
"# 'L-US_DRIVER_LICENSE', \t55\n",
"# 'U-US_DRIVER_LICENSE', \t56\n",
"# 'B-US_LICENSE_PLATE', \t57\n",
"# 'I-US_LICENSE_PLATE', \t58\n",
"# 'L-US_LICENSE_PLATE', \t59\n",
"# 'U-US_LICENSE_PLATE', \t60\n",
"# 'B-IP_ADDRESS', \t61\n",
"# 'I-IP_ADDRESS', \t62\n",
"# 'L-IP_ADDRESS', \t63\n",
"# 'U-IP_ADDRESS', \t64\n",
"# 'B-US_ITIN', \t65\n",
"# 'I-US_ITIN', \t66\n",
"# 'L-US_ITIN', \t67\n",
"# 'U-US_ITIN', \t68\n",
"# 'B-EMAIL_ADDRESS', \t69\n",
"# 'I-EMAIL_ADDRESS', \t70\n",
"# 'L-EMAIL_ADDRESS', \t71\n",
"# 'U-EMAIL_ADDRESS', \t72\n",
"# 'B-TITLE', \t73\n",
"# 'I-TITLE', \t74\n",
"# 'L-TITLE', \t75\n",
"# 'U-TITLE', \t76\n",
"# 'B-COORDINATE', \t77\n",
"# 'I-COORDINATE', \t78\n",
"# 'L-COORDINATE', \t79\n",
"# 'U-COORDINATE', \t80\n",
"# 'B-IMEI', \t81\n",
"# 'I-IMEI', \t82\n",
"# 'L-IMEI', \t83\n",
"# 'U-IMEI', \t84\n",
"# 'B-PASSWORD', \t85\n",
"# 'I-PASSWORD', \t86\n",
"# 'L-PASSWORD', \t87\n",
"# 'U-PASSWORD', \t88\n",
"# 'B-LICENSE_PLATE', \t89\n",
"# 'I-LICENSE_PLATE', \t90\n",
"# 'L-LICENSE_PLATE', \t91\n",
"# 'U-LICENSE_PLATE', \t92\n",
"# 'B-CURRENCY', \t93\n",
"# 'I-CURRENCY', \t94\n",
"# 'L-CURRENCY', \t95\n",
"# 'U-CURRENCY', \t96\n",
"# 'B-FINANCIAL', \t97\n",
"# 'I-FINANCIAL', \t98\n",
"# 'L-FINANCIAL', \t99\n",
"# 'U-FINANCIAL', \t100\n",
"# 'B-ROUTING_NUMBER', \t101\n",
"# 'I-ROUTING_NUMBER', \t102\n",
"# 'L-ROUTING_NUMBER', \t103\n",
"# 'U-ROUTING_NUMBER', \t104\n",
"# 'B-SWIFT_CODE', \t105\n",
"# 'I-SWIFT_CODE', \t106\n",
"# 'L-SWIFT_CODE', \t107\n",
"# 'U-SWIFT_CODE', \t108\n",
"# 'B-MAC_ADDRESS', \t109\n",
"# 'I-MAC_ADDRESS', \t110\n",
"# 'L-MAC_ADDRESS', \t111\n",
"# 'U-MAC_ADDRESS', \t112\n",
"# 'B-AGE', \t113\n",
"# 'I-AGE', \t114\n",
"# 'L-AGE', \t115\n",
"# 'U-AGE']\t116\n",
"\n"
]
},
{
"cell_type": "markdown",
"id": "665c8f4a-5e6d-4c7a-9b4e-263d5181d0eb",
"metadata": {},
"source": [
"## 49 papluca/xlm-roberta-base-language-detection"
]
},
{
"cell_type": "code",
"execution_count": 117,
"id": "9e192dd7-a08d-4f3f-a4ea-eb22adb0c755",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Some weights of XLMRobertaForTokenClassification were not initialized from the model checkpoint at papluca/xlm-roberta-base-language-detection and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
},
{
"data": {
"text/plain": [
"[{'entity': 'en',\n",
" 'score': 0.11591221,\n",
" 'index': 1,\n",
" 'word': '▁So',\n",
" 'start': 0,\n",
" 'end': 2},\n",
" {'entity': 'en',\n",
" 'score': 0.124623634,\n",
" 'index': 2,\n",
" 'word': ',',\n",
" 'start': 2,\n",
" 'end': 3},\n",
" {'entity': 'el',\n",
" 'score': 0.12547147,\n",
" 'index': 3,\n",
" 'word': '▁if',\n",
" 'start': 4,\n",
" 'end': 6},\n",
" {'entity': 'en',\n",
" 'score': 0.13911082,\n",
" 'index': 4,\n",
" 'word': '▁you',\n",
" 'start': 7,\n",
" 'end': 10},\n",
" {'entity': 'en',\n",
" 'score': 0.17602253,\n",
" 'index': 5,\n",
" 'word': \"'\",\n",
" 'start': 10,\n",
" 'end': 11},\n",
" {'entity': 'el',\n",
" 'score': 0.11501573,\n",
" 'index': 6,\n",
" 'word': 're',\n",
" 'start': 11,\n",
" 'end': 13},\n",
" {'entity': 'en',\n",
" 'score': 0.14821951,\n",
" 'index': 7,\n",
" 'word': '▁a',\n",
" 'start': 14,\n",
" 'end': 15},\n",
" {'entity': 'el',\n",
" 'score': 0.10118,\n",
" 'index': 8,\n",
" 'word': '▁NASA',\n",
" 'start': 16,\n",
" 'end': 20},\n",
" {'entity': 'el',\n",
" 'score': 0.09721276,\n",
" 'index': 9,\n",
" 'word': '▁scientist',\n",
" 'start': 21,\n",
" 'end': 30},\n",
" {'entity': 'el',\n",
" 'score': 0.12020393,\n",
" 'index': 10,\n",
" 'word': ',',\n",
" 'start': 30,\n",
" 'end': 31},\n",
" {'entity': 'el',\n",
" 'score': 0.13406067,\n",
" 'index': 11,\n",
" 'word': '▁you',\n",
" 'start': 32,\n",
" 'end': 35},\n",
" {'entity': 'el',\n",
" 'score': 0.11300563,\n",
" 'index': 12,\n",
" 'word': '▁should',\n",
" 'start': 36,\n",
" 'end': 42},\n",
" {'entity': 'en',\n",
" 'score': 0.14275809,\n",
" 'index': 13,\n",
" 'word': '▁be',\n",
" 'start': 43,\n",
" 'end': 45},\n",
" {'entity': 'el',\n",
" 'score': 0.1305934,\n",
" 'index': 14,\n",
" 'word': '▁able',\n",
" 'start': 46,\n",
" 'end': 50},\n",
" {'entity': 'en',\n",
" 'score': 0.16247976,\n",
" 'index': 15,\n",
" 'word': '▁to',\n",
" 'start': 51,\n",
" 'end': 53},\n",
" {'entity': 'el',\n",
" 'score': 0.12143467,\n",
" 'index': 16,\n",
" 'word': '▁tell',\n",
" 'start': 54,\n",
" 'end': 58},\n",
" {'entity': 'el',\n",
" 'score': 0.109906994,\n",
" 'index': 17,\n",
" 'word': '▁me',\n",
" 'start': 59,\n",
" 'end': 61},\n",
" {'entity': 'en',\n",
" 'score': 0.1670305,\n",
" 'index': 18,\n",
" 'word': '▁the',\n",
" 'start': 62,\n",
" 'end': 65},\n",
" {'entity': 'en',\n",
" 'score': 0.13686182,\n",
" 'index': 19,\n",
" 'word': '▁whole',\n",
" 'start': 66,\n",
" 'end': 71},\n",
" {'entity': 'el',\n",
" 'score': 0.12123093,\n",
" 'index': 20,\n",
" 'word': '▁story',\n",
" 'start': 72,\n",
" 'end': 77},\n",
" {'entity': 'en',\n",
" 'score': 0.15466973,\n",
" 'index': 21,\n",
" 'word': '▁about',\n",
" 'start': 78,\n",
" 'end': 83},\n",
" {'entity': 'en',\n",
" 'score': 0.14879441,\n",
" 'index': 22,\n",
" 'word': '▁the',\n",
" 'start': 84,\n",
" 'end': 87},\n",
" {'entity': 'en',\n",
" 'score': 0.106891826,\n",
" 'index': 23,\n",
" 'word': '▁Face',\n",
" 'start': 88,\n",
" 'end': 92},\n",
" {'entity': 'el',\n",
" 'score': 0.11462047,\n",
" 'index': 24,\n",
" 'word': '▁On',\n",
" 'start': 93,\n",
" 'end': 95},\n",
" {'entity': 'en',\n",
" 'score': 0.09874061,\n",
" 'index': 25,\n",
" 'word': '▁Mars',\n",
" 'start': 96,\n",
" 'end': 100},\n",
" {'entity': 'en',\n",
" 'score': 0.14187884,\n",
" 'index': 26,\n",
" 'word': ',',\n",
" 'start': 100,\n",
" 'end': 101},\n",
" {'entity': 'en',\n",
" 'score': 0.1268902,\n",
" 'index': 27,\n",
" 'word': '▁which',\n",
" 'start': 102,\n",
" 'end': 107},\n",
" {'entity': 'el',\n",
" 'score': 0.09524864,\n",
" 'index': 28,\n",
" 'word': '▁obviously',\n",
" 'start': 108,\n",
" 'end': 117},\n",
" {'entity': 'el',\n",
" 'score': 0.12556419,\n",
" 'index': 29,\n",
" 'word': '▁is',\n",
" 'start': 118,\n",
" 'end': 120},\n",
" {'entity': 'el',\n",
" 'score': 0.13329445,\n",
" 'index': 30,\n",
" 'word': '▁evidence',\n",
" 'start': 121,\n",
" 'end': 129},\n",
" {'entity': 'en',\n",
" 'score': 0.15930562,\n",
" 'index': 31,\n",
" 'word': '▁that',\n",
" 'start': 130,\n",
" 'end': 134},\n",
" {'entity': 'el',\n",
" 'score': 0.13351129,\n",
" 'index': 32,\n",
" 'word': '▁there',\n",
" 'start': 135,\n",
" 'end': 140},\n",
" {'entity': 'en',\n",
" 'score': 0.12728569,\n",
" 'index': 33,\n",
" 'word': '▁is',\n",
" 'start': 141,\n",
" 'end': 143},\n",
" {'entity': 'el',\n",
" 'score': 0.11237473,\n",
" 'index': 34,\n",
" 'word': '▁life',\n",
" 'start': 144,\n",
" 'end': 148},\n",
" {'entity': 'el',\n",
" 'score': 0.14296363,\n",
" 'index': 35,\n",
" 'word': '▁on',\n",
" 'start': 149,\n",
" 'end': 151},\n",
" {'entity': 'en',\n",
" 'score': 0.09912568,\n",
" 'index': 36,\n",
" 'word': '▁Mars',\n",
" 'start': 152,\n",
" 'end': 156},\n",
" {'entity': 'en',\n",
" 'score': 0.14278257,\n",
" 'index': 37,\n",
" 'word': ',',\n",
" 'start': 156,\n",
" 'end': 157},\n",
" {'entity': 'en',\n",
" 'score': 0.1724549,\n",
" 'index': 38,\n",
" 'word': '▁and',\n",
" 'start': 158,\n",
" 'end': 161},\n",
" {'entity': 'en',\n",
" 'score': 0.14657633,\n",
" 'index': 39,\n",
" 'word': '▁that',\n",
" 'start': 162,\n",
" 'end': 166},\n",
" {'entity': 'en',\n",
" 'score': 0.17009127,\n",
" 'index': 40,\n",
" 'word': '▁the',\n",
" 'start': 167,\n",
" 'end': 170},\n",
" {'entity': 'en',\n",
" 'score': 0.12712897,\n",
" 'index': 41,\n",
" 'word': '▁face',\n",
" 'start': 171,\n",
" 'end': 175},\n",
" {'entity': 'en',\n",
" 'score': 0.14602062,\n",
" 'index': 42,\n",
" 'word': '▁was',\n",
" 'start': 176,\n",
" 'end': 179},\n",
" {'entity': 'el',\n",
" 'score': 0.12748437,\n",
" 'index': 43,\n",
" 'word': '▁created',\n",
" 'start': 180,\n",
" 'end': 187},\n",
" {'entity': 'en',\n",
" 'score': 0.14597844,\n",
" 'index': 44,\n",
" 'word': '▁by',\n",
" 'start': 188,\n",
" 'end': 190},\n",
" {'entity': 'el',\n",
" 'score': 0.094343364,\n",
" 'index': 45,\n",
" 'word': '▁alien',\n",
" 'start': 191,\n",
" 'end': 196},\n",
" {'entity': 'en',\n",
" 'score': 0.137794,\n",
" 'index': 46,\n",
" 'word': 's',\n",
" 'start': 196,\n",
" 'end': 197},\n",
" {'entity': 'el',\n",
" 'score': 0.119442455,\n",
" 'index': 47,\n",
" 'word': ',',\n",
" 'start': 197,\n",
" 'end': 198},\n",
" {'entity': 'el',\n",
" 'score': 0.08792857,\n",
" 'index': 48,\n",
" 'word': '▁correct',\n",
" 'start': 199,\n",
" 'end': 206},\n",
" {'entity': 'el',\n",
" 'score': 0.08977283,\n",
" 'index': 49,\n",
" 'word': '?\"',\n",
" 'start': 206,\n",
" 'end': 208},\n",
" {'entity': 'el',\n",
" 'score': 0.08942173,\n",
" 'index': 50,\n",
" 'word': '▁No',\n",
" 'start': 209,\n",
" 'end': 211},\n",
" {'entity': 'el',\n",
" 'score': 0.11885595,\n",
" 'index': 51,\n",
" 'word': ',',\n",
" 'start': 211,\n",
" 'end': 212},\n",
" {'entity': 'en',\n",
" 'score': 0.128854,\n",
" 'index': 52,\n",
" 'word': '▁twenty',\n",
" 'start': 213,\n",
" 'end': 219},\n",
" {'entity': 'en',\n",
" 'score': 0.14528814,\n",
" 'index': 53,\n",
" 'word': '▁five',\n",
" 'start': 220,\n",
" 'end': 224},\n",
" {'entity': 'en',\n",
" 'score': 0.14489171,\n",
" 'index': 54,\n",
" 'word': '▁years',\n",
" 'start': 225,\n",
" 'end': 230},\n",
" {'entity': 'en',\n",
" 'score': 0.1306482,\n",
" 'index': 55,\n",
" 'word': '▁ago',\n",
" 'start': 231,\n",
" 'end': 234},\n",
" {'entity': 'en',\n",
" 'score': 0.13290027,\n",
" 'index': 56,\n",
" 'word': ',',\n",
" 'start': 234,\n",
" 'end': 235},\n",
" {'entity': 'el',\n",
" 'score': 0.10796524,\n",
" 'index': 57,\n",
" 'word': '▁our',\n",
" 'start': 236,\n",
" 'end': 239},\n",
" {'entity': 'el',\n",
" 'score': 0.08923595,\n",
" 'index': 58,\n",
" 'word': '▁Viking',\n",
" 'start': 240,\n",
" 'end': 246},\n",
" {'entity': 'el',\n",
" 'score': 0.08884452,\n",
" 'index': 59,\n",
" 'word': '▁1',\n",
" 'start': 247,\n",
" 'end': 248},\n",
" {'entity': 'el',\n",
" 'score': 0.11797657,\n",
" 'index': 60,\n",
" 'word': '▁space',\n",
" 'start': 249,\n",
" 'end': 254},\n",
" {'entity': 'el',\n",
" 'score': 0.10235624,\n",
" 'index': 61,\n",
" 'word': 'craft',\n",
" 'start': 254,\n",
" 'end': 259},\n",
" {'entity': 'el',\n",
" 'score': 0.12116124,\n",
" 'index': 62,\n",
" 'word': '▁was',\n",
" 'start': 260,\n",
" 'end': 263},\n",
" {'entity': 'en',\n",
" 'score': 0.13153702,\n",
" 'index': 63,\n",
" 'word': '▁circ',\n",
" 'start': 264,\n",
" 'end': 268},\n",
" {'entity': 'en',\n",
" 'score': 0.15348472,\n",
" 'index': 64,\n",
" 'word': 'ling',\n",
" 'start': 268,\n",
" 'end': 272},\n",
" {'entity': 'en',\n",
" 'score': 0.16437925,\n",
" 'index': 65,\n",
" 'word': '▁the',\n",
" 'start': 273,\n",
" 'end': 276},\n",
" {'entity': 'el',\n",
" 'score': 0.09529811,\n",
" 'index': 66,\n",
" 'word': '▁planet',\n",
" 'start': 277,\n",
" 'end': 283},\n",
" {'entity': 'el',\n",
" 'score': 0.11570662,\n",
" 'index': 67,\n",
" 'word': ',',\n",
" 'start': 283,\n",
" 'end': 284},\n",
" {'entity': 'en',\n",
" 'score': 0.10650027,\n",
" 'index': 68,\n",
" 'word': '▁sna',\n",
" 'start': 285,\n",
" 'end': 288},\n",
" {'entity': 'en',\n",
" 'score': 0.16732152,\n",
" 'index': 69,\n",
" 'word': 'pping',\n",
" 'start': 288,\n",
" 'end': 293},\n",
" {'entity': 'el',\n",
" 'score': 0.10069636,\n",
" 'index': 70,\n",
" 'word': '▁photos',\n",
" 'start': 294,\n",
" 'end': 300},\n",
" {'entity': 'en',\n",
" 'score': 0.12804745,\n",
" 'index': 71,\n",
" 'word': ',',\n",
" 'start': 300,\n",
" 'end': 301},\n",
" {'entity': 'el',\n",
" 'score': 0.12151808,\n",
" 'index': 72,\n",
" 'word': '▁when',\n",
" 'start': 302,\n",
" 'end': 306},\n",
" {'entity': 'en',\n",
" 'score': 0.14805369,\n",
" 'index': 73,\n",
" 'word': '▁it',\n",
" 'start': 307,\n",
" 'end': 309},\n",
" {'entity': 'en',\n",
" 'score': 0.13002725,\n",
" 'index': 74,\n",
" 'word': '▁spot',\n",
" 'start': 310,\n",
" 'end': 314},\n",
" {'entity': 'en',\n",
" 'score': 0.14732121,\n",
" 'index': 75,\n",
" 'word': 'ted',\n",
" 'start': 314,\n",
" 'end': 317},\n",
" {'entity': 'en',\n",
" 'score': 0.16823284,\n",
" 'index': 76,\n",
" 'word': '▁the',\n",
" 'start': 318,\n",
" 'end': 321},\n",
" {'entity': 'el',\n",
" 'score': 0.10869224,\n",
" 'index': 77,\n",
" 'word': '▁shadow',\n",
" 'start': 322,\n",
" 'end': 328},\n",
" {'entity': 'en',\n",
" 'score': 0.1593602,\n",
" 'index': 78,\n",
" 'word': 'y',\n",
" 'start': 328,\n",
" 'end': 329},\n",
" {'entity': 'el',\n",
" 'score': 0.1302847,\n",
" 'index': 79,\n",
" 'word': '▁like',\n",
" 'start': 330,\n",
" 'end': 334},\n",
" {'entity': 'en',\n",
" 'score': 0.13379778,\n",
" 'index': 80,\n",
" 'word': 'ness',\n",
" 'start': 334,\n",
" 'end': 338},\n",
" {'entity': 'en',\n",
" 'score': 0.15503198,\n",
" 'index': 81,\n",
" 'word': '▁of',\n",
" 'start': 339,\n",
" 'end': 341},\n",
" {'entity': 'en',\n",
" 'score': 0.15058768,\n",
" 'index': 82,\n",
" 'word': '▁a',\n",
" 'start': 342,\n",
" 'end': 343},\n",
" {'entity': 'el',\n",
" 'score': 0.110621594,\n",
" 'index': 83,\n",
" 'word': '▁human',\n",
" 'start': 344,\n",
" 'end': 349},\n",
" {'entity': 'en',\n",
" 'score': 0.13605565,\n",
" 'index': 84,\n",
" 'word': '▁face',\n",
" 'start': 350,\n",
" 'end': 354},\n",
" {'entity': 'en',\n",
" 'score': 0.15529045,\n",
" 'index': 85,\n",
" 'word': '.',\n",
" 'start': 354,\n",
" 'end': 355},\n",
" {'entity': 'el',\n",
" 'score': 0.100054726,\n",
" 'index': 86,\n",
" 'word': '▁Us',\n",
" 'start': 356,\n",
" 'end': 358},\n",
" {'entity': 'el',\n",
" 'score': 0.1203518,\n",
" 'index': 87,\n",
" 'word': '▁scientist',\n",
" 'start': 359,\n",
" 'end': 368},\n",
" {'entity': 'en',\n",
" 'score': 0.15678018,\n",
" 'index': 88,\n",
" 'word': 's',\n",
" 'start': 368,\n",
" 'end': 369},\n",
" {'entity': 'el',\n",
" 'score': 0.14597937,\n",
" 'index': 89,\n",
" 'word': '▁figure',\n",
" 'start': 370,\n",
" 'end': 376},\n",
" {'entity': 'en',\n",
" 'score': 0.14744708,\n",
" 'index': 90,\n",
" 'word': 'd',\n",
" 'start': 376,\n",
" 'end': 377},\n",
" {'entity': 'el',\n",
" 'score': 0.13703896,\n",
" 'index': 91,\n",
" 'word': '▁out',\n",
" 'start': 378,\n",
" 'end': 381},\n",
" {'entity': 'el',\n",
" 'score': 0.13980067,\n",
" 'index': 92,\n",
" 'word': '▁that',\n",
" 'start': 382,\n",
" 'end': 386},\n",
" {'entity': 'en',\n",
" 'score': 0.14452939,\n",
" 'index': 93,\n",
" 'word': '▁it',\n",
" 'start': 387,\n",
" 'end': 389},\n",
" {'entity': 'el',\n",
" 'score': 0.13332519,\n",
" 'index': 94,\n",
" 'word': '▁was',\n",
" 'start': 390,\n",
" 'end': 393},\n",
" {'entity': 'el',\n",
" 'score': 0.11484533,\n",
" 'index': 95,\n",
" 'word': '▁just',\n",
" 'start': 394,\n",
" 'end': 398},\n",
" {'entity': 'ar',\n",
" 'score': 0.08993049,\n",
" 'index': 96,\n",
" 'word': '▁another',\n",
" 'start': 399,\n",
" 'end': 406},\n",
" {'entity': 'ar',\n",
" 'score': 0.08672481,\n",
" 'index': 97,\n",
" 'word': '▁Marti',\n",
" 'start': 407,\n",
" 'end': 412},\n",
" {'entity': 'el',\n",
" 'score': 0.122489624,\n",
" 'index': 98,\n",
" 'word': 'an',\n",
" 'start': 412,\n",
" 'end': 414},\n",
" {'entity': 'el',\n",
" 'score': 0.08492366,\n",
" 'index': 99,\n",
" 'word': '▁mesa',\n",
" 'start': 415,\n",
" 'end': 419},\n",
" {'entity': 'el',\n",
" 'score': 0.11751986,\n",
" 'index': 100,\n",
" 'word': ',',\n",
" 'start': 419,\n",
" 'end': 420},\n",
" {'entity': 'el',\n",
" 'score': 0.097591884,\n",
" 'index': 101,\n",
" 'word': '▁common',\n",
" 'start': 421,\n",
" 'end': 427},\n",
" {'entity': 'el',\n",
" 'score': 0.106916085,\n",
" 'index': 102,\n",
" 'word': '▁around',\n",
" 'start': 428,\n",
" 'end': 434},\n",
" {'entity': 'en',\n",
" 'score': 0.120345294,\n",
" 'index': 103,\n",
" 'word': '▁Cy',\n",
" 'start': 435,\n",
" 'end': 437},\n",
" {'entity': 'en',\n",
" 'score': 0.12486706,\n",
" 'index': 104,\n",
" 'word': 'do',\n",
" 'start': 437,\n",
" 'end': 439},\n",
" {'entity': 'el',\n",
" 'score': 0.11398356,\n",
" 'index': 105,\n",
" 'word': 'nia',\n",
" 'start': 439,\n",
" 'end': 442},\n",
" {'entity': 'en',\n",
" 'score': 0.12314375,\n",
" 'index': 106,\n",
" 'word': ',',\n",
" 'start': 442,\n",
" 'end': 443},\n",
" {'entity': 'el',\n",
" 'score': 0.09227569,\n",
" 'index': 107,\n",
" 'word': '▁only',\n",
" 'start': 444,\n",
" 'end': 448},\n",
" {'entity': 'el',\n",
" 'score': 0.11441822,\n",
" 'index': 108,\n",
" 'word': '▁this',\n",
" 'start': 449,\n",
" 'end': 453},\n",
" {'entity': 'el',\n",
" 'score': 0.13375378,\n",
" 'index': 109,\n",
" 'word': '▁one',\n",
" 'start': 454,\n",
" 'end': 457},\n",
" {'entity': 'el',\n",
" 'score': 0.1260068,\n",
" 'index': 110,\n",
" 'word': '▁had',\n",
" 'start': 458,\n",
" 'end': 461},\n",
" {'entity': 'el',\n",
" 'score': 0.0983968,\n",
" 'index': 111,\n",
" 'word': '▁shadow',\n",
" 'start': 462,\n",
" 'end': 468},\n",
" {'entity': 'en',\n",
" 'score': 0.14185685,\n",
" 'index': 112,\n",
" 'word': 's',\n",
" 'start': 468,\n",
" 'end': 469},\n",
" {'entity': 'en',\n",
" 'score': 0.14497927,\n",
" 'index': 113,\n",
" 'word': '▁that',\n",
" 'start': 470,\n",
" 'end': 474},\n",
" {'entity': 'el',\n",
" 'score': 0.13247032,\n",
" 'index': 114,\n",
" 'word': '▁made',\n",
" 'start': 475,\n",
" 'end': 479},\n",
" {'entity': 'el',\n",
" 'score': 0.13548398,\n",
" 'index': 115,\n",
" 'word': '▁it',\n",
" 'start': 480,\n",
" 'end': 482},\n",
" {'entity': 'el',\n",
" 'score': 0.13423643,\n",
" 'index': 116,\n",
" 'word': '▁look',\n",
" 'start': 483,\n",
" 'end': 487},\n",
" {'entity': 'el',\n",
" 'score': 0.1423772,\n",
" 'index': 117,\n",
" 'word': '▁like',\n",
" 'start': 488,\n",
" 'end': 492},\n",
" {'entity': 'el',\n",
" 'score': 0.14577042,\n",
" 'index': 118,\n",
" 'word': '▁an',\n",
" 'start': 493,\n",
" 'end': 495},\n",
" {'entity': 'ar',\n",
" 'score': 0.081988394,\n",
" 'index': 119,\n",
" 'word': '▁Egypt',\n",
" 'start': 496,\n",
" 'end': 501},\n",
" {'entity': 'el',\n",
" 'score': 0.080225624,\n",
" 'index': 120,\n",
" 'word': 'ion',\n",
" 'start': 501,\n",
" 'end': 504},\n",
" {'entity': 'el',\n",
" 'score': 0.09277659,\n",
" 'index': 121,\n",
" 'word': '▁Phar',\n",
" 'start': 505,\n",
" 'end': 509},\n",
" {'entity': 'en',\n",
" 'score': 0.17454442,\n",
" 'index': 122,\n",
" 'word': 'a',\n",
" 'start': 509,\n",
" 'end': 510},\n",
" {'entity': 'en',\n",
" 'score': 0.10499313,\n",
" 'index': 123,\n",
" 'word': 'oh',\n",
" 'start': 510,\n",
" 'end': 512},\n",
" {'entity': 'en',\n",
" 'score': 0.1622584,\n",
" 'index': 124,\n",
" 'word': '.',\n",
" 'start': 512,\n",
" 'end': 513},\n",
" {'entity': 'en',\n",
" 'score': 0.14269882,\n",
" 'index': 125,\n",
" 'word': '▁Very',\n",
" 'start': 514,\n",
" 'end': 518},\n",
" {'entity': 'el',\n",
" 'score': 0.1382166,\n",
" 'index': 126,\n",
" 'word': '▁few',\n",
" 'start': 519,\n",
" 'end': 522},\n",
" {'entity': 'el',\n",
" 'score': 0.13428125,\n",
" 'index': 127,\n",
" 'word': '▁days',\n",
" 'start': 523,\n",
" 'end': 527},\n",
" {'entity': 'el',\n",
" 'score': 0.13032675,\n",
" 'index': 128,\n",
" 'word': '▁later',\n",
" 'start': 528,\n",
" 'end': 533},\n",
" {'entity': 'el',\n",
" 'score': 0.12615447,\n",
" 'index': 129,\n",
" 'word': ',',\n",
" 'start': 533,\n",
" 'end': 534},\n",
" {'entity': 'el',\n",
" 'score': 0.13707836,\n",
" 'index': 130,\n",
" 'word': '▁we',\n",
" 'start': 535,\n",
" 'end': 537},\n",
" {'entity': 'el',\n",
" 'score': 0.11976686,\n",
" 'index': 131,\n",
" 'word': '▁reveal',\n",
" 'start': 538,\n",
" 'end': 544},\n",
" {'entity': 'en',\n",
" 'score': 0.1461132,\n",
" 'index': 132,\n",
" 'word': 'ed',\n",
" 'start': 544,\n",
" 'end': 546},\n",
" {'entity': 'en',\n",
" 'score': 0.15168166,\n",
" 'index': 133,\n",
" 'word': '▁the',\n",
" 'start': 547,\n",
" 'end': 550},\n",
" {'entity': 'el',\n",
" 'score': 0.10999579,\n",
" 'index': 134,\n",
" 'word': '▁image',\n",
" 'start': 551,\n",
" 'end': 556},\n",
" {'entity': 'el',\n",
" 'score': 0.13296223,\n",
" 'index': 135,\n",
" 'word': '▁for',\n",
" 'start': 557,\n",
" 'end': 560},\n",
" {'entity': 'el',\n",
" 'score': 0.12075652,\n",
" 'index': 136,\n",
" 'word': '▁all',\n",
" 'start': 561,\n",
" 'end': 564},\n",
" {'entity': 'en',\n",
" 'score': 0.14416823,\n",
" 'index': 137,\n",
" 'word': '▁to',\n",
" 'start': 565,\n",
" 'end': 567},\n",
" {'entity': 'el',\n",
" 'score': 0.12048786,\n",
" 'index': 138,\n",
" 'word': '▁see',\n",
" 'start': 568,\n",
" 'end': 571},\n",
" {'entity': 'en',\n",
" 'score': 0.14799197,\n",
" 'index': 139,\n",
" 'word': ',',\n",
" 'start': 571,\n",
" 'end': 572},\n",
" {'entity': 'en',\n",
" 'score': 0.1485873,\n",
" 'index': 140,\n",
" 'word': '▁and',\n",
" 'start': 573,\n",
" 'end': 576},\n",
" {'entity': 'el',\n",
" 'score': 0.14215225,\n",
" 'index': 141,\n",
" 'word': '▁we',\n",
" 'start': 577,\n",
" 'end': 579},\n",
" {'entity': 'el',\n",
" 'score': 0.13101134,\n",
" 'index': 142,\n",
" 'word': '▁made',\n",
" 'start': 580,\n",
" 'end': 584},\n",
" {'entity': 'el',\n",
" 'score': 0.14018072,\n",
" 'index': 143,\n",
" 'word': '▁sure',\n",
" 'start': 585,\n",
" 'end': 589},\n",
" {'entity': 'en',\n",
" 'score': 0.15333681,\n",
" 'index': 144,\n",
" 'word': '▁to',\n",
" 'start': 590,\n",
" 'end': 592},\n",
" {'entity': 'el',\n",
" 'score': 0.11945983,\n",
" 'index': 145,\n",
" 'word': '▁note',\n",
" 'start': 593,\n",
" 'end': 597},\n",
" {'entity': 'el',\n",
" 'score': 0.13811947,\n",
" 'index': 146,\n",
" 'word': '▁that',\n",
" 'start': 598,\n",
" 'end': 602},\n",
" {'entity': 'en',\n",
" 'score': 0.14908531,\n",
" 'index': 147,\n",
" 'word': '▁it',\n",
" 'start': 603,\n",
" 'end': 605},\n",
" {'entity': 'el',\n",
" 'score': 0.13367933,\n",
" 'index': 148,\n",
" 'word': '▁was',\n",
" 'start': 606,\n",
" 'end': 609},\n",
" {'entity': 'en',\n",
" 'score': 0.1512028,\n",
" 'index': 149,\n",
" 'word': '▁a',\n",
" 'start': 610,\n",
" 'end': 611},\n",
" {'entity': 'el',\n",
" 'score': 0.12587915,\n",
" 'index': 150,\n",
" 'word': '▁huge',\n",
" 'start': 612,\n",
" 'end': 616},\n",
" {'entity': 'el',\n",
" 'score': 0.0964705,\n",
" 'index': 151,\n",
" 'word': '▁rock',\n",
" 'start': 617,\n",
" 'end': 621},\n",
" {'entity': 'el',\n",
" 'score': 0.11105906,\n",
" 'index': 152,\n",
" 'word': '▁formation',\n",
" 'start': 622,\n",
" 'end': 631},\n",
" {'entity': 'en',\n",
" 'score': 0.14971292,\n",
" 'index': 153,\n",
" 'word': '▁that',\n",
" 'start': 632,\n",
" 'end': 636},\n",
" {'entity': 'el',\n",
" 'score': 0.14679578,\n",
" 'index': 154,\n",
" 'word': '▁just',\n",
" 'start': 637,\n",
" 'end': 641},\n",
" {'entity': 'en',\n",
" 'score': 0.15699105,\n",
" 'index': 155,\n",
" 'word': '▁rese',\n",
" 'start': 642,\n",
" 'end': 646},\n",
" {'entity': 'en',\n",
" 'score': 0.15588175,\n",
" 'index': 156,\n",
" 'word': 'mble',\n",
" 'start': 646,\n",
" 'end': 650},\n",
" {'entity': 'en',\n",
" 'score': 0.15559243,\n",
" 'index': 157,\n",
" 'word': 'd',\n",
" 'start': 650,\n",
" 'end': 651},\n",
" {'entity': 'el',\n",
" 'score': 0.14160416,\n",
" 'index': 158,\n",
" 'word': '▁a',\n",
" 'start': 652,\n",
" 'end': 653},\n",
" {'entity': 'el',\n",
" 'score': 0.11545096,\n",
" 'index': 159,\n",
" 'word': '▁human',\n",
" 'start': 654,\n",
" 'end': 659},\n",
" {'entity': 'el',\n",
" 'score': 0.09418173,\n",
" 'index': 160,\n",
" 'word': '▁head',\n",
" 'start': 660,\n",
" 'end': 664},\n",
" {'entity': 'en',\n",
" 'score': 0.15333837,\n",
" 'index': 161,\n",
" 'word': '▁and',\n",
" 'start': 665,\n",
" 'end': 668},\n",
" {'entity': 'en',\n",
" 'score': 0.1189025,\n",
" 'index': 162,\n",
" 'word': '▁face',\n",
" 'start': 669,\n",
" 'end': 673},\n",
" {'entity': 'en',\n",
" 'score': 0.15204029,\n",
" 'index': 163,\n",
" 'word': ',',\n",
" 'start': 673,\n",
" 'end': 674},\n",
" {'entity': 'el',\n",
" 'score': 0.1288495,\n",
" 'index': 164,\n",
" 'word': '▁but',\n",
" 'start': 675,\n",
" 'end': 678},\n",
" {'entity': 'el',\n",
" 'score': 0.12995465,\n",
" 'index': 165,\n",
" 'word': '▁all',\n",
" 'start': 679,\n",
" 'end': 682},\n",
" {'entity': 'en',\n",
" 'score': 0.15353577,\n",
" 'index': 166,\n",
" 'word': '▁of',\n",
" 'start': 683,\n",
" 'end': 685},\n",
" {'entity': 'en',\n",
" 'score': 0.152363,\n",
" 'index': 167,\n",
" 'word': '▁it',\n",
" 'start': 686,\n",
" 'end': 688},\n",
" {'entity': 'en',\n",
" 'score': 0.1399179,\n",
" 'index': 168,\n",
" 'word': '▁was',\n",
" 'start': 689,\n",
" 'end': 692},\n",
" {'entity': 'en',\n",
" 'score': 0.1595559,\n",
" 'index': 169,\n",
" 'word': '▁for',\n",
" 'start': 693,\n",
" 'end': 696},\n",
" {'entity': 'en',\n",
" 'score': 0.15899158,\n",
" 'index': 170,\n",
" 'word': 'med',\n",
" 'start': 696,\n",
" 'end': 699},\n",
" {'entity': 'el',\n",
" 'score': 0.121669054,\n",
" 'index': 171,\n",
" 'word': '▁by',\n",
" 'start': 700,\n",
" 'end': 702},\n",
" {'entity': 'en',\n",
" 'score': 0.106091,\n",
" 'index': 172,\n",
" 'word': '▁shadow',\n",
" 'start': 703,\n",
" 'end': 709},\n",
" {'entity': 'en',\n",
" 'score': 0.14808753,\n",
" 'index': 173,\n",
" 'word': 's',\n",
" 'start': 709,\n",
" 'end': 710},\n",
" {'entity': 'en',\n",
" 'score': 0.16585921,\n",
" 'index': 174,\n",
" 'word': '.',\n",
" 'start': 710,\n",
" 'end': 711},\n",
" {'entity': 'en',\n",
" 'score': 0.1352026,\n",
" 'index': 175,\n",
" 'word': '▁We',\n",
" 'start': 712,\n",
" 'end': 714},\n",
" {'entity': 'el',\n",
" 'score': 0.11284589,\n",
" 'index': 176,\n",
" 'word': '▁only',\n",
" 'start': 715,\n",
" 'end': 719},\n",
" {'entity': 'en',\n",
" 'score': 0.09965902,\n",
" 'index': 177,\n",
" 'word': '▁announced',\n",
" 'start': 720,\n",
" 'end': 729},\n",
" {'entity': 'en',\n",
" 'score': 0.15637,\n",
" 'index': 178,\n",
" 'word': '▁it',\n",
" 'start': 730,\n",
" 'end': 732},\n",
" {'entity': 'el',\n",
" 'score': 0.12986624,\n",
" 'index': 179,\n",
" 'word': '▁because',\n",
" 'start': 733,\n",
" 'end': 740},\n",
" {'entity': 'el',\n",
" 'score': 0.14423794,\n",
" 'index': 180,\n",
" 'word': '▁we',\n",
" 'start': 741,\n",
" 'end': 743},\n",
" {'entity': 'el',\n",
" 'score': 0.1403598,\n",
" 'index': 181,\n",
" 'word': '▁thought',\n",
" 'start': 744,\n",
" 'end': 751},\n",
" {'entity': 'en',\n",
" 'score': 0.1668672,\n",
" 'index': 182,\n",
" 'word': '▁it',\n",
" 'start': 752,\n",
" 'end': 754},\n",
" {'entity': 'el',\n",
" 'score': 0.13016574,\n",
" 'index': 183,\n",
" 'word': '▁would',\n",
" 'start': 755,\n",
" 'end': 760},\n",
" {'entity': 'el',\n",
" 'score': 0.14227276,\n",
" 'index': 184,\n",
" 'word': '▁be',\n",
" 'start': 761,\n",
" 'end': 763},\n",
" {'entity': 'el',\n",
" 'score': 0.14242609,\n",
" 'index': 185,\n",
" 'word': '▁a',\n",
" 'start': 764,\n",
" 'end': 765},\n",
" {'entity': 'el',\n",
" 'score': 0.14193374,\n",
" 'index': 186,\n",
" 'word': '▁good',\n",
" 'start': 766,\n",
" 'end': 770},\n",
" {'entity': 'el',\n",
" 'score': 0.13710198,\n",
" 'index': 187,\n",
" 'word': '▁way',\n",
" 'start': 771,\n",
" 'end': 774},\n",
" {'entity': 'el',\n",
" 'score': 0.14386745,\n",
" 'index': 188,\n",
" 'word': '▁to',\n",
" 'start': 775,\n",
" 'end': 777},\n",
" {'entity': 'el',\n",
" 'score': 0.11916898,\n",
" 'index': 189,\n",
" 'word': '▁engage',\n",
" 'start': 778,\n",
" 'end': 784},\n",
" {'entity': 'en',\n",
" 'score': 0.1367849,\n",
" 'index': 190,\n",
" 'word': '▁the',\n",
" 'start': 785,\n",
" 'end': 788},\n",
" {'entity': 'el',\n",
" 'score': 0.12032364,\n",
" 'index': 191,\n",
" 'word': '▁public',\n",
" 'start': 789,\n",
" 'end': 795},\n",
" {'entity': 'el',\n",
" 'score': 0.11247768,\n",
" 'index': 192,\n",
" 'word': '▁with',\n",
" 'start': 796,\n",
" 'end': 800},\n",
" {'entity': 'el',\n",
" 'score': 0.10595911,\n",
" 'index': 193,\n",
" 'word': '▁NASA',\n",
" 'start': 801,\n",
" 'end': 805},\n",
" {'entity': 'en',\n",
" 'score': 0.15294623,\n",
" 'index': 194,\n",
" 'word': \"'\",\n",
" 'start': 805,\n",
" 'end': 806},\n",
" {'entity': 'en',\n",
" 'score': 0.14347178,\n",
" 'index': 195,\n",
" 'word': 's',\n",
" 'start': 806,\n",
" 'end': 807},\n",
" {'entity': 'el',\n",
" 'score': 0.111513235,\n",
" 'index': 196,\n",
" 'word': '▁finding',\n",
" 'start': 808,\n",
" 'end': 815},\n",
" {'entity': 'en',\n",
" 'score': 0.13011272,\n",
" 'index': 197,\n",
" 'word': 's',\n",
" 'start': 815,\n",
" 'end': 816},\n",
" {'entity': 'en',\n",
" 'score': 0.1234625,\n",
" 'index': 198,\n",
" 'word': ',',\n",
" 'start': 816,\n",
" 'end': 817},\n",
" {'entity': 'en',\n",
" 'score': 0.13722092,\n",
" 'index': 199,\n",
" 'word': '▁and',\n",
" 'start': 818,\n",
" 'end': 821},\n",
" {'entity': 'en',\n",
" 'score': 0.15018424,\n",
" 'index': 200,\n",
" 'word': '▁at',\n",
" 'start': 822,\n",
" 'end': 824},\n",
" {'entity': 'en',\n",
" 'score': 0.12469069,\n",
" 'index': 201,\n",
" 'word': 'rra',\n",
" 'start': 824,\n",
" 'end': 827},\n",
" {'entity': 'en',\n",
" 'score': 0.13173658,\n",
" 'index': 202,\n",
" 'word': 'ct',\n",
" 'start': 827,\n",
" 'end': 829},\n",
" {'entity': 'el',\n",
" 'score': 0.089053564,\n",
" 'index': 203,\n",
" 'word': '▁attention',\n",
" 'start': 830,\n",
" 'end': 839},\n",
" {'entity': 'el',\n",
" 'score': 0.1162944,\n",
" 'index': 204,\n",
" 'word': '▁to',\n",
" 'start': 840,\n",
" 'end': 842},\n",
" {'entity': 'el',\n",
" 'score': 0.08081766,\n",
" 'index': 205,\n",
" 'word': '▁Mars',\n",
" 'start': 843,\n",
" 'end': 847},\n",
" {'entity': 'en',\n",
" 'score': 0.16521907,\n",
" 'index': 206,\n",
" 'word': '-',\n",
" 'start': 847,\n",
" 'end': 848},\n",
" {'entity': 'en',\n",
" 'score': 0.13379842,\n",
" 'index': 207,\n",
" 'word': '-',\n",
" 'start': 848,\n",
" 'end': 849},\n",
" {'entity': 'el',\n",
" 'score': 0.12595442,\n",
" 'index': 208,\n",
" 'word': '▁and',\n",
" 'start': 850,\n",
" 'end': 853},\n",
" {'entity': 'en',\n",
" 'score': 0.14046155,\n",
" 'index': 209,\n",
" 'word': '▁it',\n",
" 'start': 854,\n",
" 'end': 856},\n",
" {'entity': 'el',\n",
" 'score': 0.13404858,\n",
" 'index': 210,\n",
" 'word': '▁did',\n",
" 'start': 857,\n",
" 'end': 860},\n",
" {'entity': 'en',\n",
" 'score': 0.16926748,\n",
" 'index': 211,\n",
" 'word': '.',\n",
" 'start': 860,\n",
" 'end': 861},\n",
" {'entity': 'en',\n",
" 'score': 0.16880788,\n",
" 'index': 212,\n",
" 'word': '▁The',\n",
" 'start': 863,\n",
" 'end': 866},\n",
" {'entity': 'en',\n",
" 'score': 0.13957304,\n",
" 'index': 213,\n",
" 'word': '▁face',\n",
" 'start': 867,\n",
" 'end': 871},\n",
" {'entity': 'el',\n",
" 'score': 0.13834707,\n",
" 'index': 214,\n",
" 'word': '▁on',\n",
" 'start': 872,\n",
" 'end': 874},\n",
" {'entity': 'en',\n",
" 'score': 0.09909718,\n",
" 'index': 215,\n",
" 'word': '▁Mars',\n",
" 'start': 875,\n",
" 'end': 879},\n",
" {'entity': 'el',\n",
" 'score': 0.12846456,\n",
" 'index': 216,\n",
" 'word': '▁soon',\n",
" 'start': 880,\n",
" 'end': 884},\n",
" {'entity': 'el',\n",
" 'score': 0.11763253,\n",
" 'index': 217,\n",
" 'word': '▁became',\n",
" 'start': 885,\n",
" 'end': 891},\n",
" {'entity': 'en',\n",
" 'score': 0.1485309,\n",
" 'index': 218,\n",
" 'word': '▁a',\n",
" 'start': 892,\n",
" 'end': 893},\n",
" {'entity': 'el',\n",
" 'score': 0.09540079,\n",
" 'index': 219,\n",
" 'word': '▁pop',\n",
" 'start': 894,\n",
" 'end': 897},\n",
" {'entity': 'el',\n",
" 'score': 0.099501655,\n",
" 'index': 220,\n",
" 'word': '▁icon',\n",
" 'start': 898,\n",
" 'end': 902},\n",
" {'entity': 'en',\n",
" 'score': 0.13926134,\n",
" 'index': 221,\n",
" 'word': ';',\n",
" 'start': 902,\n",
" 'end': 903},\n",
" {'entity': 'en',\n",
" 'score': 0.12448082,\n",
" 'index': 222,\n",
" 'word': '▁shot',\n",
" 'start': 904,\n",
" 'end': 908},\n",
" {'entity': 'en',\n",
" 'score': 0.14647718,\n",
" 'index': 223,\n",
" 'word': '▁in',\n",
" 'start': 909,\n",
" 'end': 911},\n",
" {'entity': 'en',\n",
" 'score': 0.10021849,\n",
" 'index': 224,\n",
" 'word': '▁movies',\n",
" 'start': 912,\n",
" 'end': 918},\n",
" {'entity': 'en',\n",
" 'score': 0.12970805,\n",
" 'index': 225,\n",
" 'word': ',',\n",
" 'start': 918,\n",
" 'end': 919},\n",
" {'entity': 'en',\n",
" 'score': 0.15426567,\n",
" 'index': 226,\n",
" 'word': '▁appeared',\n",
" 'start': 920,\n",
" 'end': 928},\n",
" {'entity': 'en',\n",
" 'score': 0.14625426,\n",
" 'index': 227,\n",
" 'word': '▁in',\n",
" 'start': 929,\n",
" 'end': 931},\n",
" {'entity': 'el',\n",
" 'score': 0.11217332,\n",
" 'index': 228,\n",
" 'word': '▁books',\n",
" 'start': 932,\n",
" 'end': 937},\n",
" {'entity': 'el',\n",
" 'score': 0.117495194,\n",
" 'index': 229,\n",
" 'word': ',',\n",
" 'start': 937,\n",
" 'end': 938},\n",
" {'entity': 'el',\n",
" 'score': 0.12197139,\n",
" 'index': 230,\n",
" 'word': '▁magazine',\n",
" 'start': 939,\n",
" 'end': 947},\n",
" {'entity': 'en',\n",
" 'score': 0.17034268,\n",
" 'index': 231,\n",
" 'word': 's',\n",
" 'start': 947,\n",
" 'end': 948},\n",
" {'entity': 'el',\n",
" 'score': 0.12415104,\n",
" 'index': 232,\n",
" 'word': ',',\n",
" 'start': 948,\n",
" 'end': 949},\n",
" {'entity': 'el',\n",
" 'score': 0.12769704,\n",
" 'index': 233,\n",
" 'word': '▁radio',\n",
" 'start': 950,\n",
" 'end': 955},\n",
" {'entity': 'el',\n",
" 'score': 0.12205196,\n",
" 'index': 234,\n",
" 'word': '▁talk',\n",
" 'start': 956,\n",
" 'end': 960},\n",
" {'entity': 'el',\n",
" 'score': 0.12856682,\n",
" 'index': 235,\n",
" 'word': '▁shows',\n",
" 'start': 961,\n",
" 'end': 966},\n",
" {'entity': 'en',\n",
" 'score': 0.13193049,\n",
" 'index': 236,\n",
" 'word': ',',\n",
" 'start': 966,\n",
" 'end': 967},\n",
" {'entity': 'en',\n",
" 'score': 0.15642078,\n",
" 'index': 237,\n",
" 'word': '▁and',\n",
" 'start': 968,\n",
" 'end': 971},\n",
" {'entity': 'en',\n",
" 'score': 0.15124984,\n",
" 'index': 238,\n",
" 'word': '▁ha',\n",
" 'start': 972,\n",
" 'end': 974},\n",
" {'entity': 'en',\n",
" 'score': 0.1550119,\n",
" 'index': 239,\n",
" 'word': 'un',\n",
" 'start': 974,\n",
" 'end': 976},\n",
" {'entity': 'en',\n",
" 'score': 0.14671806,\n",
" 'index': 240,\n",
" 'word': 'ted',\n",
" 'start': 976,\n",
" 'end': 979},\n",
" {'entity': 'en',\n",
" 'score': 0.1287589,\n",
" 'index': 241,\n",
" 'word': '▁gro',\n",
" 'start': 980,\n",
" 'end': 983},\n",
" {'entity': 'el',\n",
" 'score': 0.12542117,\n",
" 'index': 242,\n",
" 'word': 'cer',\n",
" 'start': 983,\n",
" 'end': 986},\n",
" {'entity': 'en',\n",
" 'score': 0.16864416,\n",
" 'index': 243,\n",
" 'word': 'y',\n",
" 'start': 986,\n",
" 'end': 987},\n",
" {'entity': 'el',\n",
" 'score': 0.11950577,\n",
" 'index': 244,\n",
" 'word': '▁store',\n",
" 'start': 988,\n",
" 'end': 993},\n",
" {'entity': 'el',\n",
" 'score': 0.12966508,\n",
" 'index': 245,\n",
" 'word': '▁check',\n",
" 'start': 994,\n",
" 'end': 999},\n",
" {'entity': 'el',\n",
" 'score': 0.12523231,\n",
" 'index': 246,\n",
" 'word': 'out',\n",
" 'start': 999,\n",
" 'end': 1002},\n",
" {'entity': 'el',\n",
" 'score': 0.10310095,\n",
" 'index': 247,\n",
" 'word': '▁lines',\n",
" 'start': 1003,\n",
" 'end': 1008},\n",
" {'entity': 'el',\n",
" 'score': 0.13300535,\n",
" 'index': 248,\n",
" 'word': '▁for',\n",
" 'start': 1009,\n",
" 'end': 1012},\n",
" {'entity': 'el',\n",
" 'score': 0.116698794,\n",
" 'index': 249,\n",
" 'word': '▁25',\n",
" 'start': 1013,\n",
" 'end': 1015},\n",
" {'entity': 'el',\n",
" 'score': 0.13448924,\n",
" 'index': 250,\n",
" 'word': '▁years',\n",
" 'start': 1016,\n",
" 'end': 1021},\n",
" {'entity': 'en',\n",
" 'score': 0.16032621,\n",
" 'index': 251,\n",
" 'word': '.',\n",
" 'start': 1021,\n",
" 'end': 1022},\n",
" {'entity': 'el',\n",
" 'score': 0.104690254,\n",
" 'index': 252,\n",
" 'word': '▁Some',\n",
" 'start': 1023,\n",
" 'end': 1027},\n",
" {'entity': 'el',\n",
" 'score': 0.12012685,\n",
" 'index': 253,\n",
" 'word': '▁people',\n",
" 'start': 1028,\n",
" 'end': 1034},\n",
" {'entity': 'el',\n",
" 'score': 0.12279296,\n",
" 'index': 254,\n",
" 'word': '▁thought',\n",
" 'start': 1035,\n",
" 'end': 1042},\n",
" {'entity': 'en',\n",
" 'score': 0.14976098,\n",
" 'index': 255,\n",
" 'word': '▁the',\n",
" 'start': 1043,\n",
" 'end': 1046},\n",
" {'entity': 'el',\n",
" 'score': 0.10598802,\n",
" 'index': 256,\n",
" 'word': '▁natural',\n",
" 'start': 1047,\n",
" 'end': 1054},\n",
" {'entity': 'el',\n",
" 'score': 0.10722617,\n",
" 'index': 257,\n",
" 'word': '▁land',\n",
" 'start': 1055,\n",
" 'end': 1059},\n",
" {'entity': 'en',\n",
" 'score': 0.11747365,\n",
" 'index': 258,\n",
" 'word': 'form',\n",
" 'start': 1059,\n",
" 'end': 1063},\n",
" {'entity': 'el',\n",
" 'score': 0.12782969,\n",
" 'index': 259,\n",
" 'word': '▁was',\n",
" 'start': 1064,\n",
" 'end': 1067},\n",
" {'entity': 'el',\n",
" 'score': 0.13294275,\n",
" 'index': 260,\n",
" 'word': '▁evidence',\n",
" 'start': 1068,\n",
" 'end': 1076},\n",
" {'entity': 'en',\n",
" 'score': 0.16003017,\n",
" 'index': 261,\n",
" 'word': '▁of',\n",
" 'start': 1077,\n",
" 'end': 1079},\n",
" {'entity': 'en',\n",
" 'score': 0.13127412,\n",
" 'index': 262,\n",
" 'word': '▁life',\n",
" 'start': 1080,\n",
" 'end': 1084},\n",
" {'entity': 'el',\n",
" 'score': 0.14569594,\n",
" 'index': 263,\n",
" 'word': '▁on',\n",
" 'start': 1085,\n",
" 'end': 1087},\n",
" {'entity': 'el',\n",
" 'score': 0.086975396,\n",
" 'index': 264,\n",
" 'word': '▁Mars',\n",
" 'start': 1088,\n",
" 'end': 1092},\n",
" {'entity': 'en',\n",
" 'score': 0.13438605,\n",
" 'index': 265,\n",
" 'word': ',',\n",
" 'start': 1092,\n",
" 'end': 1093},\n",
" {'entity': 'en',\n",
" 'score': 0.15218768,\n",
" 'index': 266,\n",
" 'word': '▁and',\n",
" 'start': 1094,\n",
" 'end': 1097},\n",
" {'entity': 'el',\n",
" 'score': 0.11441549,\n",
" 'index': 267,\n",
" 'word': '▁that',\n",
" 'start': 1098,\n",
" 'end': 1102},\n",
" {'entity': 'el',\n",
" 'score': 0.10406643,\n",
" 'index': 268,\n",
" 'word': '▁us',\n",
" 'start': 1103,\n",
" 'end': 1105},\n",
" {'entity': 'el',\n",
" 'score': 0.1157087,\n",
" 'index': 269,\n",
" 'word': '▁scientist',\n",
" 'start': 1106,\n",
" 'end': 1115},\n",
" {'entity': 'en',\n",
" 'score': 0.16390227,\n",
" 'index': 270,\n",
" 'word': 's',\n",
" 'start': 1115,\n",
" 'end': 1116},\n",
" {'entity': 'el',\n",
" 'score': 0.12261817,\n",
" 'index': 271,\n",
" 'word': '▁wanted',\n",
" 'start': 1117,\n",
" 'end': 1123},\n",
" {'entity': 'en',\n",
" 'score': 0.17378253,\n",
" 'index': 272,\n",
" 'word': '▁to',\n",
" 'start': 1124,\n",
" 'end': 1126},\n",
" {'entity': 'en',\n",
" 'score': 0.14640202,\n",
" 'index': 273,\n",
" 'word': '▁hi',\n",
" 'start': 1127,\n",
" 'end': 1129},\n",
" {'entity': 'en',\n",
" 'score': 0.14813536,\n",
" 'index': 274,\n",
" 'word': 'de',\n",
" 'start': 1129,\n",
" 'end': 1131},\n",
" {'entity': 'en',\n",
" 'score': 0.13482781,\n",
" 'index': 275,\n",
" 'word': '▁it',\n",
" 'start': 1132,\n",
" 'end': 1134},\n",
" {'entity': 'en',\n",
" 'score': 0.1534762,\n",
" 'index': 276,\n",
" 'word': ',',\n",
" 'start': 1134,\n",
" 'end': 1135},\n",
" {'entity': 'el',\n",
" 'score': 0.12395833,\n",
" 'index': 277,\n",
" 'word': '▁but',\n",
" 'start': 1136,\n",
" 'end': 1139},\n",
" {'entity': 'el',\n",
" 'score': 0.0911575,\n",
" 'index': 278,\n",
" 'word': '▁really',\n",
" 'start': 1140,\n",
" 'end': 1146},\n",
" {'entity': 'el',\n",
" 'score': 0.12752935,\n",
" 'index': 279,\n",
" 'word': ',',\n",
" 'start': 1146,\n",
" 'end': 1147},\n",
" {'entity': 'el',\n",
" 'score': 0.13102081,\n",
" 'index': 280,\n",
" 'word': '▁the',\n",
" 'start': 1148,\n",
" 'end': 1151},\n",
" {'entity': 'el',\n",
" 'score': 0.09118256,\n",
" 'index': 281,\n",
" 'word': '▁defender',\n",
" 'start': 1152,\n",
" 'end': 1160},\n",
" {'entity': 'el',\n",
" 'score': 0.12889816,\n",
" 'index': 282,\n",
" 'word': 's',\n",
" 'start': 1160,\n",
" 'end': 1161},\n",
" {'entity': 'el',\n",
" 'score': 0.131198,\n",
" 'index': 283,\n",
" 'word': '▁of',\n",
" 'start': 1162,\n",
" 'end': 1164},\n",
" {'entity': 'el',\n",
" 'score': 0.13177402,\n",
" 'index': 284,\n",
" 'word': '▁the',\n",
" 'start': 1165,\n",
" 'end': 1168},\n",
" {'entity': 'el',\n",
" 'score': 0.0976391,\n",
" 'index': 285,\n",
" 'word': '▁NASA',\n",
" 'start': 1169,\n",
" 'end': 1173},\n",
" {'entity': 'ja',\n",
" 'score': 0.077623695,\n",
" 'index': 286,\n",
" 'word': '▁budget',\n",
" 'start': 1174,\n",
" 'end': 1180},\n",
" {'entity': 'el',\n",
" 'score': 0.10235032,\n",
" 'index': 287,\n",
" 'word': '▁wish',\n",
" 'start': 1181,\n",
" 'end': 1185},\n",
" {'entity': 'el',\n",
" 'score': 0.11458244,\n",
" 'index': 288,\n",
" 'word': '▁there',\n",
" 'start': 1186,\n",
" 'end': 1191},\n",
" {'entity': 'el',\n",
" 'score': 0.12292923,\n",
" 'index': 289,\n",
" 'word': '▁was',\n",
" 'start': 1192,\n",
" 'end': 1195},\n",
" {'entity': 'en',\n",
" 'score': 0.13890299,\n",
" 'index': 290,\n",
" 'word': '▁an',\n",
" 'start': 1196,\n",
" 'end': 1198},\n",
" {'entity': 'el',\n",
" 'score': 0.12614945,\n",
" 'index': 291,\n",
" 'word': 'cient',\n",
" 'start': 1198,\n",
" 'end': 1203},\n",
" {'entity': 'el',\n",
" 'score': 0.098485425,\n",
" 'index': 292,\n",
" 'word': '▁civiliza',\n",
" 'start': 1204,\n",
" 'end': 1212},\n",
" {'entity': 'en',\n",
" 'score': 0.13695684,\n",
" 'index': 293,\n",
" 'word': 'tion',\n",
" 'start': 1212,\n",
" 'end': 1216},\n",
" {'entity': 'el',\n",
" 'score': 0.12930857,\n",
" 'index': 294,\n",
" 'word': '▁on',\n",
" 'start': 1217,\n",
" 'end': 1219},\n",
" {'entity': 'en',\n",
" 'score': 0.091095164,\n",
" 'index': 295,\n",
" 'word': '▁Mars',\n",
" 'start': 1220,\n",
" 'end': 1224},\n",
" {'entity': 'en',\n",
" 'score': 0.15858984,\n",
" 'index': 296,\n",
" 'word': '.',\n",
" 'start': 1224,\n",
" 'end': 1225},\n",
" {'entity': 'en',\n",
" 'score': 0.12306447,\n",
" 'index': 297,\n",
" 'word': '▁We',\n",
" 'start': 1226,\n",
" 'end': 1228},\n",
" {'entity': 'el',\n",
" 'score': 0.12401779,\n",
" 'index': 298,\n",
" 'word': '▁decided',\n",
" 'start': 1229,\n",
" 'end': 1236},\n",
" {'entity': 'en',\n",
" 'score': 0.1532955,\n",
" 'index': 299,\n",
" 'word': '▁to',\n",
" 'start': 1237,\n",
" 'end': 1239},\n",
" {'entity': 'el',\n",
" 'score': 0.10732532,\n",
" 'index': 300,\n",
" 'word': '▁take',\n",
" 'start': 1240,\n",
" 'end': 1244},\n",
" {'entity': 'el',\n",
" 'score': 0.10733517,\n",
" 'index': 301,\n",
" 'word': '▁another',\n",
" 'start': 1245,\n",
" 'end': 1252},\n",
" {'entity': 'el',\n",
" 'score': 0.101071455,\n",
" 'index': 302,\n",
" 'word': '▁shot',\n",
" 'start': 1253,\n",
" 'end': 1257},\n",
" {'entity': 'el',\n",
" 'score': 0.11504756,\n",
" 'index': 303,\n",
" 'word': '▁just',\n",
" 'start': 1258,\n",
" 'end': 1262},\n",
" {'entity': 'en',\n",
" 'score': 0.16051938,\n",
" 'index': 304,\n",
" 'word': '▁to',\n",
" 'start': 1263,\n",
" 'end': 1265},\n",
" {'entity': 'en',\n",
" 'score': 0.1526192,\n",
" 'index': 305,\n",
" 'word': '▁make',\n",
" 'start': 1266,\n",
" 'end': 1270},\n",
" {'entity': 'el',\n",
" 'score': 0.1414837,\n",
" 'index': 306,\n",
" 'word': '▁sure',\n",
" 'start': 1271,\n",
" 'end': 1275},\n",
" {'entity': 'el',\n",
" 'score': 0.13466269,\n",
" 'index': 307,\n",
" 'word': '▁we',\n",
" 'start': 1276,\n",
" 'end': 1278},\n",
" {'entity': 'en',\n",
" 'score': 0.15096475,\n",
" 'index': 308,\n",
" 'word': '▁were',\n",
" 'start': 1279,\n",
" 'end': 1283},\n",
" {'entity': 'en',\n",
" 'score': 0.1969613,\n",
" 'index': 309,\n",
" 'word': 'n',\n",
" 'start': 1283,\n",
" 'end': 1284},\n",
" {'entity': 'en',\n",
" 'score': 0.15865912,\n",
" 'index': 310,\n",
" 'word': \"'\",\n",
" 'start': 1284,\n",
" 'end': 1285},\n",
" {'entity': 'en',\n",
" 'score': 0.16572025,\n",
" 'index': 311,\n",
" 'word': 't',\n",
" 'start': 1285,\n",
" 'end': 1286},\n",
" {'entity': 'el',\n",
" 'score': 0.12193232,\n",
" 'index': 312,\n",
" 'word': '▁wrong',\n",
" 'start': 1287,\n",
" 'end': 1292},\n",
" {'entity': 'el',\n",
" 'score': 0.10861691,\n",
" 'index': 313,\n",
" 'word': ',',\n",
" 'start': 1292,\n",
" 'end': 1293},\n",
" {'entity': 'el',\n",
" 'score': 0.12936045,\n",
" 'index': 314,\n",
" 'word': '▁on',\n",
" 'start': 1294,\n",
" 'end': 1296},\n",
" {'entity': 'en',\n",
" 'score': 0.10274368,\n",
" 'index': 315,\n",
" 'word': '▁April',\n",
" 'start': 1297,\n",
" 'end': 1302},\n",
" {'entity': 'el',\n",
" 'score': 0.11178034,\n",
" 'index': 316,\n",
" 'word': '▁5',\n",
" 'start': 1303,\n",
" 'end': 1304},\n",
" {'entity': 'en',\n",
" 'score': 0.12274597,\n",
" 'index': 317,\n",
" 'word': ',',\n",
" 'start': 1304,\n",
" 'end': 1305},\n",
" {'entity': 'en',\n",
" 'score': 0.13640046,\n",
" 'index': 318,\n",
" 'word': '▁1998.',\n",
" 'start': 1306,\n",
" 'end': 1311},\n",
" {'entity': 'el',\n",
" 'score': 0.09738847,\n",
" 'index': 319,\n",
" 'word': '▁Michael',\n",
" 'start': 1312,\n",
" 'end': 1319},\n",
" {'entity': 'ar',\n",
" 'score': 0.0863802,\n",
" 'index': 320,\n",
" 'word': '▁Malin',\n",
" 'start': 1320,\n",
" 'end': 1325},\n",
" {'entity': 'el',\n",
" 'score': 0.13613664,\n",
" 'index': 321,\n",
" 'word': '▁and',\n",
" 'start': 1326,\n",
" 'end': 1329},\n",
" {'entity': 'el',\n",
" 'score': 0.12056482,\n",
" 'index': 322,\n",
" 'word': '▁his',\n",
" 'start': 1330,\n",
" 'end': 1333},\n",
" {'entity': 'el',\n",
" 'score': 0.084936745,\n",
" 'index': 323,\n",
" 'word': '▁Mars',\n",
" 'start': 1334,\n",
" 'end': 1338},\n",
" {'entity': 'en',\n",
" 'score': 0.09353773,\n",
" 'index': 324,\n",
" 'word': '▁Or',\n",
" 'start': 1339,\n",
" 'end': 1341},\n",
" {'entity': 'el',\n",
" 'score': 0.09600473,\n",
" 'index': 325,\n",
" 'word': 'bit',\n",
" 'start': 1341,\n",
" 'end': 1344},\n",
" {'entity': 'en',\n",
" 'score': 0.13925415,\n",
" 'index': 326,\n",
" 'word': 'er',\n",
" 'start': 1344,\n",
" 'end': 1346},\n",
" {'entity': 'el',\n",
" 'score': 0.106588826,\n",
" 'index': 327,\n",
" 'word': '▁camera',\n",
" 'start': 1347,\n",
" 'end': 1353},\n",
" {'entity': 'el',\n",
" 'score': 0.11866151,\n",
" 'index': 328,\n",
" 'word': '▁team',\n",
" 'start': 1354,\n",
" 'end': 1358},\n",
" {'entity': 'el',\n",
" 'score': 0.1267877,\n",
" 'index': 329,\n",
" 'word': '▁took',\n",
" 'start': 1359,\n",
" 'end': 1363},\n",
" {'entity': 'en',\n",
" 'score': 0.1520372,\n",
" 'index': 330,\n",
" 'word': '▁a',\n",
" 'start': 1364,\n",
" 'end': 1365},\n",
" {'entity': 'el',\n",
" 'score': 0.11103296,\n",
" 'index': 331,\n",
" 'word': '▁picture',\n",
" 'start': 1366,\n",
" 'end': 1373},\n",
" {'entity': 'en',\n",
" 'score': 0.1478278,\n",
" 'index': 332,\n",
" 'word': '▁that',\n",
" 'start': 1374,\n",
" 'end': 1378},\n",
" {'entity': 'el',\n",
" 'score': 0.13980234,\n",
" 'index': 333,\n",
" 'word': '▁was',\n",
" 'start': 1379,\n",
" 'end': 1382},\n",
" {'entity': 'el',\n",
" 'score': 0.14334293,\n",
" 'index': 334,\n",
" 'word': '▁ten',\n",
" 'start': 1383,\n",
" 'end': 1386},\n",
" {'entity': 'el',\n",
" 'score': 0.17359938,\n",
" 'index': 335,\n",
" 'word': '▁times',\n",
" 'start': 1387,\n",
" 'end': 1392},\n",
" {'entity': 'el',\n",
" 'score': 0.115307085,\n",
" 'index': 336,\n",
" 'word': '▁sharp',\n",
" 'start': 1393,\n",
" 'end': 1398},\n",
" {'entity': 'el',\n",
" 'score': 0.13487983,\n",
" 'index': 337,\n",
" 'word': 'er',\n",
" 'start': 1398,\n",
" 'end': 1400},\n",
" {'entity': 'el',\n",
" 'score': 0.15190518,\n",
" 'index': 338,\n",
" 'word': '▁than',\n",
" 'start': 1401,\n",
" 'end': 1405},\n",
" {'entity': 'en',\n",
" 'score': 0.16202395,\n",
" 'index': 339,\n",
" 'word': '▁the',\n",
" 'start': 1406,\n",
" 'end': 1409},\n",
" {'entity': 'el',\n",
" 'score': 0.12381832,\n",
" 'index': 340,\n",
" 'word': '▁original',\n",
" 'start': 1410,\n",
" 'end': 1418},\n",
" {'entity': 'el',\n",
" 'score': 0.07887648,\n",
" 'index': 341,\n",
" 'word': '▁Viking',\n",
" 'start': 1419,\n",
" 'end': 1425},\n",
" {'entity': 'el',\n",
" 'score': 0.1085552,\n",
" 'index': 342,\n",
" 'word': '▁photos',\n",
" 'start': 1426,\n",
" 'end': 1432},\n",
" {'entity': 'el',\n",
" 'score': 0.12933005,\n",
" 'index': 343,\n",
" 'word': ',',\n",
" 'start': 1432,\n",
" 'end': 1433},\n",
" {'entity': 'el',\n",
" 'score': 0.13211969,\n",
" 'index': 344,\n",
" 'word': '▁reveal',\n",
" 'start': 1434,\n",
" 'end': 1440},\n",
" {'entity': 'en',\n",
" 'score': 0.13877197,\n",
" 'index': 345,\n",
" 'word': 'ing',\n",
" 'start': 1440,\n",
" 'end': 1443},\n",
" {'entity': 'el',\n",
" 'score': 0.14274132,\n",
" 'index': 346,\n",
" 'word': '▁a',\n",
" 'start': 1444,\n",
" 'end': 1445},\n",
" {'entity': 'el',\n",
" 'score': 0.12289959,\n",
" 'index': 347,\n",
" 'word': '▁natural',\n",
" 'start': 1446,\n",
" 'end': 1453},\n",
" {'entity': 'el',\n",
" 'score': 0.11685172,\n",
" 'index': 348,\n",
" 'word': '▁land',\n",
" 'start': 1454,\n",
" 'end': 1458},\n",
" {'entity': 'el',\n",
" 'score': 0.1232755,\n",
" 'index': 349,\n",
" 'word': 'form',\n",
" 'start': 1458,\n",
" 'end': 1462},\n",
" {'entity': 'el',\n",
" 'score': 0.12781556,\n",
" 'index': 350,\n",
" 'word': ',',\n",
" 'start': 1462,\n",
" 'end': 1463},\n",
" {'entity': 'en',\n",
" 'score': 0.15072589,\n",
" 'index': 351,\n",
" 'word': '▁which',\n",
" 'start': 1464,\n",
" 'end': 1469},\n",
" {'entity': 'el',\n",
" 'score': 0.11072004,\n",
" 'index': 352,\n",
" 'word': '▁meant',\n",
" 'start': 1470,\n",
" 'end': 1475},\n",
" {'entity': 'el',\n",
" 'score': 0.12505238,\n",
" 'index': 353,\n",
" 'word': '▁no',\n",
" 'start': 1476,\n",
" 'end': 1478},\n",
" {'entity': 'ar',\n",
" 'score': 0.08642981,\n",
" 'index': 354,\n",
" 'word': '▁alien',\n",
" 'start': 1479,\n",
" 'end': 1484},\n",
" {'entity': 'pl',\n",
" 'score': 0.075424016,\n",
" 'index': 355,\n",
" 'word': '▁monument',\n",
" 'start': 1485,\n",
" 'end': 1493},\n",
" {'entity': 'en',\n",
" 'score': 0.15359755,\n",
" 'index': 356,\n",
" 'word': '.',\n",
" 'start': 1493,\n",
" 'end': 1494},\n",
" {'entity': 'el',\n",
" 'score': 0.11350483,\n",
" 'index': 357,\n",
" 'word': '▁\"',\n",
" 'start': 1495,\n",
" 'end': 1496},\n",
" {'entity': 'el',\n",
" 'score': 0.11290481,\n",
" 'index': 358,\n",
" 'word': 'But',\n",
" 'start': 1496,\n",
" 'end': 1499},\n",
" {'entity': 'el',\n",
" 'score': 0.12361978,\n",
" 'index': 359,\n",
" 'word': '▁that',\n",
" 'start': 1500,\n",
" 'end': 1504},\n",
" {'entity': 'el',\n",
" 'score': 0.11042566,\n",
" 'index': 360,\n",
" 'word': '▁picture',\n",
" 'start': 1505,\n",
" 'end': 1512},\n",
" {'entity': 'en',\n",
" 'score': 0.13758177,\n",
" 'index': 361,\n",
" 'word': '▁wasn',\n",
" 'start': 1513,\n",
" 'end': 1517},\n",
" {'entity': 'en',\n",
" 'score': 0.14231738,\n",
" 'index': 362,\n",
" 'word': \"'\",\n",
" 'start': 1517,\n",
" 'end': 1518},\n",
" {'entity': 'en',\n",
" 'score': 0.1495189,\n",
" 'index': 363,\n",
" 'word': 't',\n",
" 'start': 1518,\n",
" 'end': 1519},\n",
" {'entity': 'el',\n",
" 'score': 0.13938442,\n",
" 'index': 364,\n",
" 'word': '▁very',\n",
" 'start': 1520,\n",
" 'end': 1524},\n",
" {'entity': 'el',\n",
" 'score': 0.13038687,\n",
" 'index': 365,\n",
" 'word': '▁clear',\n",
" 'start': 1525,\n",
" 'end': 1530},\n",
" {'entity': 'el',\n",
" 'score': 0.144432,\n",
" 'index': 366,\n",
" 'word': '▁at',\n",
" 'start': 1531,\n",
" 'end': 1533},\n",
" {'entity': 'el',\n",
" 'score': 0.15075718,\n",
" 'index': 367,\n",
" 'word': '▁all',\n",
" 'start': 1534,\n",
" 'end': 1537},\n",
" {'entity': 'el',\n",
" 'score': 0.12753487,\n",
" 'index': 368,\n",
" 'word': ',',\n",
" 'start': 1537,\n",
" 'end': 1538},\n",
" {'entity': 'el',\n",
" 'score': 0.13230762,\n",
" 'index': 369,\n",
" 'word': '▁which',\n",
" 'start': 1539,\n",
" 'end': 1544},\n",
" {'entity': 'el',\n",
" 'score': 0.12133482,\n",
" 'index': 370,\n",
" 'word': '▁could',\n",
" 'start': 1545,\n",
" 'end': 1550},\n",
" {'entity': 'el',\n",
" 'score': 0.15801905,\n",
" 'index': 371,\n",
" 'word': '▁mean',\n",
" 'start': 1551,\n",
" 'end': 1555},\n",
" {'entity': 'el',\n",
" 'score': 0.09326559,\n",
" 'index': 372,\n",
" 'word': '▁alien',\n",
" 'start': 1556,\n",
" 'end': 1561},\n",
" {'entity': 'el',\n",
" 'score': 0.112567335,\n",
" 'index': 373,\n",
" 'word': '▁mark',\n",
" 'start': 1562,\n",
" 'end': 1566},\n",
" {'entity': 'el',\n",
" 'score': 0.13536157,\n",
" 'index': 374,\n",
" 'word': 'ings',\n",
" 'start': 1566,\n",
" 'end': 1570},\n",
" {'entity': 'el',\n",
" 'score': 0.13931376,\n",
" 'index': 375,\n",
" 'word': '▁were',\n",
" 'start': 1571,\n",
" 'end': 1575},\n",
" {'entity': 'el',\n",
" 'score': 0.1297942,\n",
" 'index': 376,\n",
" 'word': '▁hidden',\n",
" 'start': 1576,\n",
" 'end': 1582},\n",
" {'entity': 'el',\n",
" 'score': 0.12882504,\n",
" 'index': 377,\n",
" 'word': '▁by',\n",
" 'start': 1583,\n",
" 'end': 1585},\n",
" {'entity': 'en',\n",
" 'score': 0.13760869,\n",
" 'index': 378,\n",
" 'word': '▁ha',\n",
" 'start': 1586,\n",
" 'end': 1588},\n",
" {'entity': 'en',\n",
" 'score': 0.11820038,\n",
" 'index': 379,\n",
" 'word': 'ze',\n",
" 'start': 1588,\n",
" 'end': 1590},\n",
" {'entity': 'en',\n",
" 'score': 0.12755689,\n",
" 'index': 380,\n",
" 'word': '\"',\n",
" 'start': 1590,\n",
" 'end': 1591},\n",
" {'entity': 'el',\n",
" 'score': 0.08038798,\n",
" 'index': 381,\n",
" 'word': '▁Well',\n",
" 'start': 1592,\n",
" 'end': 1596},\n",
" {'entity': 'el',\n",
" 'score': 0.106813006,\n",
" 'index': 382,\n",
" 'word': '▁no',\n",
" 'start': 1597,\n",
" 'end': 1599},\n",
" {'entity': 'el',\n",
" 'score': 0.13041495,\n",
" 'index': 383,\n",
" 'word': ',',\n",
" 'start': 1599,\n",
" 'end': 1600},\n",
" {'entity': 'el',\n",
" 'score': 0.100906804,\n",
" 'index': 384,\n",
" 'word': '▁yes',\n",
" 'start': 1601,\n",
" 'end': 1604},\n",
" {'entity': 'el',\n",
" 'score': 0.11085139,\n",
" 'index': 385,\n",
" 'word': '▁that',\n",
" 'start': 1605,\n",
" 'end': 1609},\n",
" {'entity': 'el',\n",
" 'score': 0.090841085,\n",
" 'index': 386,\n",
" 'word': '▁rumor',\n",
" 'start': 1610,\n",
" 'end': 1615},\n",
" {'entity': 'el',\n",
" 'score': 0.0912762,\n",
" 'index': 387,\n",
" 'word': '▁started',\n",
" 'start': 1616,\n",
" 'end': 1623},\n",
" {'entity': 'en',\n",
" 'score': 0.15546727,\n",
" 'index': 388,\n",
" 'word': ',',\n",
" 'start': 1623,\n",
" 'end': 1624},\n",
" {'entity': 'el',\n",
" 'score': 0.12896377,\n",
" 'index': 389,\n",
" 'word': '▁but',\n",
" 'start': 1625,\n",
" 'end': 1628},\n",
" {'entity': 'el',\n",
" 'score': 0.14137326,\n",
" 'index': 390,\n",
" 'word': '▁to',\n",
" 'start': 1629,\n",
" 'end': 1631},\n",
" {'entity': 'el',\n",
" 'score': 0.1308083,\n",
" 'index': 391,\n",
" 'word': '▁prove',\n",
" 'start': 1632,\n",
" 'end': 1637},\n",
" {'entity': 'el',\n",
" 'score': 0.120038114,\n",
" 'index': 392,\n",
" 'word': '▁them',\n",
" 'start': 1638,\n",
" 'end': 1642},\n",
" {'entity': 'el',\n",
" 'score': 0.12024542,\n",
" 'index': 393,\n",
" 'word': '▁wrong',\n",
" 'start': 1643,\n",
" 'end': 1648},\n",
" {'entity': 'el',\n",
" 'score': 0.13746382,\n",
" 'index': 394,\n",
" 'word': '▁on',\n",
" 'start': 1649,\n",
" 'end': 1651},\n",
" {'entity': 'el',\n",
" 'score': 0.10328479,\n",
" 'index': 395,\n",
" 'word': '▁April',\n",
" 'start': 1652,\n",
" 'end': 1657},\n",
" {'entity': 'el',\n",
" 'score': 0.11870165,\n",
" 'index': 396,\n",
" 'word': '▁8',\n",
" 'start': 1658,\n",
" 'end': 1659},\n",
" {'entity': 'el',\n",
" 'score': 0.1222689,\n",
" 'index': 397,\n",
" 'word': ',',\n",
" 'start': 1659,\n",
" 'end': 1660},\n",
" {'entity': 'el',\n",
" 'score': 0.109291695,\n",
" 'index': 398,\n",
" 'word': '▁2001',\n",
" 'start': 1661,\n",
" 'end': 1665},\n",
" {'entity': 'el',\n",
" 'score': 0.13815217,\n",
" 'index': 399,\n",
" 'word': '▁we',\n",
" 'start': 1666,\n",
" 'end': 1668},\n",
" {'entity': 'el',\n",
" 'score': 0.14033255,\n",
" 'index': 400,\n",
" 'word': '▁decided',\n",
" 'start': 1669,\n",
" 'end': 1676},\n",
" {'entity': 'en',\n",
" 'score': 0.15627232,\n",
" 'index': 401,\n",
" 'word': '▁to',\n",
" 'start': 1677,\n",
" 'end': 1679},\n",
" {'entity': 'el',\n",
" 'score': 0.11807532,\n",
" 'index': 402,\n",
" 'word': '▁take',\n",
" 'start': 1680,\n",
" 'end': 1684},\n",
" {'entity': 'el',\n",
" 'score': 0.11256976,\n",
" 'index': 403,\n",
" 'word': '▁another',\n",
" 'start': 1685,\n",
" 'end': 1692},\n",
" {'entity': 'el',\n",
" 'score': 0.10331523,\n",
" 'index': 404,\n",
" 'word': '▁picture',\n",
" 'start': 1693,\n",
" 'end': 1700},\n",
" {'entity': 'el',\n",
" 'score': 0.13425903,\n",
" 'index': 405,\n",
" 'word': ',',\n",
" 'start': 1700,\n",
" 'end': 1701},\n",
" {'entity': 'el',\n",
" 'score': 0.1378113,\n",
" 'index': 406,\n",
" 'word': '▁making',\n",
" 'start': 1702,\n",
" 'end': 1708},\n",
" {'entity': 'el',\n",
" 'score': 0.15436757,\n",
" 'index': 407,\n",
" 'word': '▁sure',\n",
" 'start': 1709,\n",
" 'end': 1713},\n",
" {'entity': 'en',\n",
" 'score': 0.15380576,\n",
" 'index': 408,\n",
" 'word': '▁it',\n",
" 'start': 1714,\n",
" 'end': 1716},\n",
" {'entity': 'el',\n",
" 'score': 0.13963144,\n",
" 'index': 409,\n",
" 'word': '▁was',\n",
" 'start': 1717,\n",
" 'end': 1720},\n",
" {'entity': 'en',\n",
" 'score': 0.14155927,\n",
" 'index': 410,\n",
" 'word': '▁a',\n",
" 'start': 1721,\n",
" 'end': 1722},\n",
" {'entity': 'en',\n",
" 'score': 0.09928071,\n",
" 'index': 411,\n",
" 'word': '▁cloud',\n",
" 'start': 1723,\n",
" 'end': 1728},\n",
" {'entity': 'el',\n",
" 'score': 0.12012173,\n",
" 'index': 412,\n",
" 'word': 'less',\n",
" 'start': 1728,\n",
" 'end': 1732},\n",
" {'entity': 'el',\n",
" 'score': 0.10342507,\n",
" 'index': 413,\n",
" 'word': '▁summer',\n",
" 'start': 1733,\n",
" 'end': 1739},\n",
" {'entity': 'el',\n",
" 'score': 0.12684572,\n",
" 'index': 414,\n",
" 'word': '▁day',\n",
" 'start': 1740,\n",
" 'end': 1743},\n",
" {'entity': 'en',\n",
" 'score': 0.1442453,\n",
" 'index': 415,\n",
" 'word': '.',\n",
" 'start': 1743,\n",
" 'end': 1744},\n",
" {'entity': 'el',\n",
" 'score': 0.087297305,\n",
" 'index': 416,\n",
" 'word': '▁Malin',\n",
" 'start': 1745,\n",
" 'end': 1750},\n",
" {'entity': 'en',\n",
" 'score': 0.13008924,\n",
" 'index': 417,\n",
" 'word': \"'\",\n",
" 'start': 1750,\n",
" 'end': 1751},\n",
" {'entity': 'en',\n",
" 'score': 0.13482796,\n",
" 'index': 418,\n",
" 'word': 's',\n",
" 'start': 1751,\n",
" 'end': 1752},\n",
" {'entity': 'el',\n",
" 'score': 0.114518456,\n",
" 'index': 419,\n",
" 'word': '▁team',\n",
" 'start': 1753,\n",
" 'end': 1757},\n",
" {'entity': 'el',\n",
" 'score': 0.12293864,\n",
" 'index': 420,\n",
" 'word': '▁capture',\n",
" 'start': 1758,\n",
" 'end': 1765},\n",
" {'entity': 'el',\n",
" 'score': 0.13779704,\n",
" 'index': 421,\n",
" 'word': 'd',\n",
" 'start': 1765,\n",
" 'end': 1766},\n",
" {'entity': 'el',\n",
" 'score': 0.1461829,\n",
" 'index': 422,\n",
" 'word': '▁an',\n",
" 'start': 1767,\n",
" 'end': 1769},\n",
" {'entity': 'el',\n",
" 'score': 0.11995733,\n",
" 'index': 423,\n",
" 'word': '▁amazing',\n",
" 'start': 1770,\n",
" 'end': 1777},\n",
" {'entity': 'el',\n",
" 'score': 0.10462855,\n",
" 'index': 424,\n",
" 'word': '▁photo',\n",
" 'start': 1778,\n",
" 'end': 1783},\n",
" {'entity': 'el',\n",
" 'score': 0.11723955,\n",
" 'index': 425,\n",
" 'word': '▁using',\n",
" 'start': 1784,\n",
" 'end': 1789},\n",
" {'entity': 'en',\n",
" 'score': 0.14316602,\n",
" 'index': 426,\n",
" 'word': '▁the',\n",
" 'start': 1790,\n",
" 'end': 1793},\n",
" {'entity': 'el',\n",
" 'score': 0.113886684,\n",
" 'index': 427,\n",
" 'word': '▁camera',\n",
" 'start': 1794,\n",
" 'end': 1800},\n",
" {'entity': 'el',\n",
" 'score': 0.117555104,\n",
" 'index': 428,\n",
" 'word': \"'\",\n",
" 'start': 1800,\n",
" 'end': 1801},\n",
" {'entity': 'el',\n",
" 'score': 0.12543674,\n",
" 'index': 429,\n",
" 'word': 's',\n",
" 'start': 1801,\n",
" 'end': 1802},\n",
" {'entity': 'el',\n",
" 'score': 0.08178501,\n",
" 'index': 430,\n",
" 'word': '▁absolute',\n",
" 'start': 1803,\n",
" 'end': 1811},\n",
" {'entity': 'ar',\n",
" 'score': 0.084494546,\n",
" 'index': 431,\n",
" 'word': '▁maximum',\n",
" 'start': 1812,\n",
" 'end': 1819},\n",
" {'entity': 'ar',\n",
" 'score': 0.07143905,\n",
" 'index': 432,\n",
" 'word': '▁revolution',\n",
" 'start': 1820,\n",
" 'end': 1830},\n",
" {'entity': 'en',\n",
" 'score': 0.12196331,\n",
" 'index': 433,\n",
" 'word': '.',\n",
" 'start': 1830,\n",
" 'end': 1831},\n",
" {'entity': 'el',\n",
" 'score': 0.10553519,\n",
" 'index': 434,\n",
" 'word': '▁With',\n",
" 'start': 1832,\n",
" 'end': 1836},\n",
" {'entity': 'el',\n",
" 'score': 0.12206387,\n",
" 'index': 435,\n",
" 'word': '▁this',\n",
" 'start': 1837,\n",
" 'end': 1841},\n",
" {'entity': 'el',\n",
" 'score': 0.10974171,\n",
" 'index': 436,\n",
" 'word': '▁camera',\n",
" 'start': 1842,\n",
" 'end': 1848},\n",
" {'entity': 'el',\n",
" 'score': 0.12890373,\n",
" 'index': 437,\n",
" 'word': '▁you',\n",
" 'start': 1849,\n",
" 'end': 1852},\n",
" {'entity': 'el',\n",
" 'score': 0.13222878,\n",
" 'index': 438,\n",
" 'word': '▁can',\n",
" 'start': 1853,\n",
" 'end': 1856},\n",
" {'entity': 'el',\n",
" 'score': 0.10733961,\n",
" 'index': 439,\n",
" 'word': '▁discern',\n",
" 'start': 1857,\n",
" 'end': 1864},\n",
" {'entity': 'el',\n",
" 'score': 0.11150802,\n",
" 'index': 440,\n",
" 'word': '▁things',\n",
" 'start': 1865,\n",
" 'end': 1871},\n",
" {'entity': 'el',\n",
" 'score': 0.12971912,\n",
" 'index': 441,\n",
" 'word': '▁in',\n",
" 'start': 1872,\n",
" 'end': 1874},\n",
" {'entity': 'el',\n",
" 'score': 0.14733568,\n",
" 'index': 442,\n",
" 'word': '▁a',\n",
" 'start': 1875,\n",
" 'end': 1876},\n",
" {'entity': 'el',\n",
" 'score': 0.11389056,\n",
" 'index': 443,\n",
" 'word': '▁digital',\n",
" 'start': 1877,\n",
" 'end': 1884},\n",
" {'entity': 'el',\n",
" 'score': 0.11318145,\n",
" 'index': 444,\n",
" 'word': '▁image',\n",
" 'start': 1885,\n",
" 'end': 1890},\n",
" {'entity': 'el',\n",
" 'score': 0.13167904,\n",
" 'index': 445,\n",
" 'word': ',',\n",
" 'start': 1890,\n",
" 'end': 1891},\n",
" {'entity': 'el',\n",
" 'score': 0.11767233,\n",
" 'index': 446,\n",
" 'word': '▁3',\n",
" 'start': 1892,\n",
" 'end': 1893},\n",
" {'entity': 'el',\n",
" 'score': 0.18132198,\n",
" 'index': 447,\n",
" 'word': '▁times',\n",
" 'start': 1894,\n",
" 'end': 1899},\n",
" {'entity': 'el',\n",
" 'score': 0.14084792,\n",
" 'index': 448,\n",
" 'word': '▁bigger',\n",
" 'start': 1900,\n",
" 'end': 1906},\n",
" {'entity': 'el',\n",
" 'score': 0.14768834,\n",
" 'index': 449,\n",
" 'word': '▁than',\n",
" 'start': 1907,\n",
" 'end': 1911},\n",
" {'entity': 'en',\n",
" 'score': 0.14468607,\n",
" 'index': 450,\n",
" 'word': '▁the',\n",
" 'start': 1912,\n",
" 'end': 1915},\n",
" {'entity': 'el',\n",
" 'score': 0.105042085,\n",
" 'index': 451,\n",
" 'word': '▁pixel',\n",
" 'start': 1916,\n",
" 'end': 1921},\n",
" {'entity': 'el',\n",
" 'score': 0.13860421,\n",
" 'index': 452,\n",
" 'word': '▁size',\n",
" 'start': 1922,\n",
" 'end': 1926},\n",
" {'entity': 'el',\n",
" 'score': 0.12235864,\n",
" 'index': 453,\n",
" 'word': '▁which',\n",
" 'start': 1927,\n",
" 'end': 1932},\n",
" {'entity': 'el',\n",
" 'score': 0.155492,\n",
" 'index': 454,\n",
" 'word': '▁means',\n",
" 'start': 1933,\n",
" 'end': 1938},\n",
" {'entity': 'el',\n",
" 'score': 0.119502716,\n",
" 'index': 455,\n",
" 'word': '▁if',\n",
" 'start': 1939,\n",
" 'end': 1941},\n",
" {'entity': 'en',\n",
" 'score': 0.14724067,\n",
" 'index': 456,\n",
" 'word': '▁there',\n",
" 'start': 1942,\n",
" 'end': 1947},\n",
" {'entity': 'el',\n",
" 'score': 0.13590516,\n",
" 'index': 457,\n",
" 'word': '▁were',\n",
" 'start': 1948,\n",
" 'end': 1952},\n",
" {'entity': 'el',\n",
" 'score': 0.12619598,\n",
" 'index': 458,\n",
" 'word': '▁any',\n",
" 'start': 1953,\n",
" 'end': 1956},\n",
" {'entity': 'el',\n",
" 'score': 0.11781136,\n",
" 'index': 459,\n",
" 'word': '▁sign',\n",
" 'start': 1957,\n",
" 'end': 1961},\n",
" {'entity': 'en',\n",
" 'score': 0.14715949,\n",
" 'index': 460,\n",
" 'word': 's',\n",
" 'start': 1961,\n",
" 'end': 1962},\n",
" {'entity': 'en',\n",
" 'score': 0.1466556,\n",
" 'index': 461,\n",
" 'word': '▁of',\n",
" 'start': 1963,\n",
" 'end': 1965},\n",
" {'entity': 'el',\n",
" 'score': 0.123666875,\n",
" 'index': 462,\n",
" 'word': '▁life',\n",
" 'start': 1966,\n",
" 'end': 1970},\n",
" {'entity': 'el',\n",
" 'score': 0.12355319,\n",
" 'index': 463,\n",
" 'word': ',',\n",
" 'start': 1970,\n",
" 'end': 1971},\n",
" {'entity': 'el',\n",
" 'score': 0.14050364,\n",
" 'index': 464,\n",
" 'word': '▁you',\n",
" 'start': 1972,\n",
" 'end': 1975},\n",
" {'entity': 'el',\n",
" 'score': 0.13710876,\n",
" 'index': 465,\n",
" 'word': '▁could',\n",
" 'start': 1976,\n",
" 'end': 1981},\n",
" {'entity': 'el',\n",
" 'score': 0.14485125,\n",
" 'index': 466,\n",
" 'word': '▁easily',\n",
" 'start': 1982,\n",
" 'end': 1988},\n",
" {'entity': 'el',\n",
" 'score': 0.13146858,\n",
" 'index': 467,\n",
" 'word': '▁see',\n",
" 'start': 1989,\n",
" 'end': 1992},\n",
" {'entity': 'el',\n",
" 'score': 0.13325945,\n",
" 'index': 468,\n",
" 'word': '▁what',\n",
" 'start': 1993,\n",
" 'end': 1997},\n",
" {'entity': 'el',\n",
" 'score': 0.14085865,\n",
" 'index': 469,\n",
" 'word': '▁they',\n",
" 'start': 1998,\n",
" 'end': 2002},\n",
" {'entity': 'el',\n",
" 'score': 0.13667713,\n",
" 'index': 470,\n",
" 'word': '▁were',\n",
" 'start': 2003,\n",
" 'end': 2007},\n",
" {'entity': 'en',\n",
" 'score': 0.13597657,\n",
" 'index': 471,\n",
" 'word': '.',\n",
" 'start': 2007,\n",
" 'end': 2008},\n",
" {'entity': 'el',\n",
" 'score': 0.109606914,\n",
" 'index': 472,\n",
" 'word': '▁What',\n",
" 'start': 2009,\n",
" 'end': 2013},\n",
" {'entity': 'en',\n",
" 'score': 0.14616685,\n",
" 'index': 473,\n",
" 'word': '▁the',\n",
" 'start': 2014,\n",
" 'end': 2017},\n",
" {'entity': 'el',\n",
" 'score': 0.10915576,\n",
" 'index': 474,\n",
" 'word': '▁picture',\n",
" 'start': 2018,\n",
" 'end': 2025},\n",
" {'entity': 'el',\n",
" 'score': 0.12212968,\n",
" 'index': 475,\n",
" 'word': '▁showed',\n",
" 'start': 2026,\n",
" 'end': 2032},\n",
" {'entity': 'el',\n",
" 'score': 0.11899673,\n",
" 'index': 476,\n",
" 'word': '▁was',\n",
" 'start': 2033,\n",
" 'end': 2036},\n",
" {'entity': 'el',\n",
" 'score': 0.12273235,\n",
" 'index': 477,\n",
" 'word': '▁the',\n",
" 'start': 2037,\n",
" 'end': 2040},\n",
" {'entity': 'el',\n",
" 'score': 0.08423174,\n",
" 'index': 478,\n",
" 'word': '▁but',\n",
" 'start': 2041,\n",
" 'end': 2044},\n",
" {'entity': 'el',\n",
" 'score': 0.098441444,\n",
" 'index': 479,\n",
" 'word': 'te',\n",
" 'start': 2044,\n",
" 'end': 2046},\n",
" {'entity': 'ar',\n",
" 'score': 0.08634294,\n",
" 'index': 480,\n",
" 'word': '▁or',\n",
" 'start': 2047,\n",
" 'end': 2049},\n",
" {'entity': 'el',\n",
" 'score': 0.084831,\n",
" 'index': 481,\n",
" 'word': '▁mesa',\n",
" 'start': 2050,\n",
" 'end': 2054},\n",
" {'entity': 'el',\n",
" 'score': 0.115126304,\n",
" 'index': 482,\n",
" 'word': ',',\n",
" 'start': 2054,\n",
" 'end': 2055},\n",
" {'entity': 'el',\n",
" 'score': 0.12282577,\n",
" 'index': 483,\n",
" 'word': '▁which',\n",
" 'start': 2056,\n",
" 'end': 2061},\n",
" {'entity': 'el',\n",
" 'score': 0.11013289,\n",
" 'index': 484,\n",
" 'word': '▁are',\n",
" 'start': 2062,\n",
" 'end': 2065},\n",
" {'entity': 'el',\n",
" 'score': 0.10554905,\n",
" 'index': 485,\n",
" 'word': '▁land',\n",
" 'start': 2066,\n",
" 'end': 2070},\n",
" {'entity': 'el',\n",
" 'score': 0.10868882,\n",
" 'index': 486,\n",
" 'word': 'form',\n",
" 'start': 2070,\n",
" 'end': 2074},\n",
" {'entity': 'el',\n",
" 'score': 0.118700296,\n",
" 'index': 487,\n",
" 'word': 's',\n",
" 'start': 2074,\n",
" 'end': 2075},\n",
" {'entity': 'el',\n",
" 'score': 0.093575306,\n",
" 'index': 488,\n",
" 'word': '▁common',\n",
" 'start': 2076,\n",
" 'end': 2082},\n",
" {'entity': 'el',\n",
" 'score': 0.11244102,\n",
" 'index': 489,\n",
" 'word': '▁around',\n",
" 'start': 2083,\n",
" 'end': 2089},\n",
" {'entity': 'en',\n",
" 'score': 0.14871824,\n",
" 'index': 490,\n",
" 'word': '▁the',\n",
" 'start': 2090,\n",
" 'end': 2093},\n",
" {'entity': 'el',\n",
" 'score': 0.11480587,\n",
" 'index': 491,\n",
" 'word': '▁American',\n",
" 'start': 2094,\n",
" 'end': 2102},\n",
" {'entity': 'el',\n",
" 'score': 0.10203579,\n",
" 'index': 492,\n",
" 'word': '▁West',\n",
" 'start': 2103,\n",
" 'end': 2107},\n",
" {'entity': 'en',\n",
" 'score': 0.15275316,\n",
" 'index': 493,\n",
" 'word': '.',\n",
" 'start': 2107,\n",
" 'end': 2108}]"
]
},
"execution_count": 117,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"papluca/xlm-roberta-base-language-detection\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"papluca/xlm-roberta-base-language-detection\")\n",
"nlp = pipeline(\"token-classification\", model=model, tokenizer=tokenizer)\n",
"\n",
"ner_results = nlp(text)\n",
"ner_results\n"
]
},
{
"cell_type": "code",
"execution_count": 125,
"id": "f76c3b4a-b798-4518-bae7-978c0420fbc3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"ar 8\n",
"el 305\n",
"en 178\n",
"ja 1\n",
"pl 1\n",
"dtype: int64\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" entity | \n",
" score | \n",
"
\n",
" \n",
" entity | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" ar | \n",
" 8 | \n",
" 0.084216 | \n",
"
\n",
" \n",
" el | \n",
" 305 | \n",
" 0.120293 | \n",
"
\n",
" \n",
" en | \n",
" 178 | \n",
" 0.143087 | \n",
"
\n",
" \n",
" ja | \n",
" 1 | \n",
" 0.077624 | \n",
"
\n",
" \n",
" pl | \n",
" 1 | \n",
" 0.075424 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" entity score\n",
"entity \n",
"ar 8 0.084216\n",
"el 305 0.120293\n",
"en 178 0.143087\n",
"ja 1 0.077624\n",
"pl 1 0.075424"
]
},
"execution_count": 125,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"with open(\"49 paplucaxlm-roberta-base-language-detection.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()\n",
"\n",
"aux.groupby(['entity']) \\\n",
" .agg({'entity':'size', 'score':'mean'}) \\\n",
" #.rename(columns={'text':'count','sent':'mean_sent'}) \\\n",
" #.reset_index()"
]
},
{
"cell_type": "markdown",
"id": "148d03c8-bed4-4325-8f59-1fe8438a5615",
"metadata": {},
"source": [
"## 50 mbruton/spa_en_mBERT"
]
},
{
"cell_type": "code",
"execution_count": 109,
"id": "87a8abbb-47a5-4f90-8655-d3dd9f8ade18",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'entity': 'r0:arg1|tem',\n",
" 'score': 0.29541913,\n",
" 'index': 4,\n",
" 'word': 'you',\n",
" 'start': 7,\n",
" 'end': 10},\n",
" {'entity': 'r0:root',\n",
" 'score': 0.4976404,\n",
" 'index': 5,\n",
" 'word': \"'\",\n",
" 'start': 10,\n",
" 'end': 11},\n",
" {'entity': 'r0:root',\n",
" 'score': 0.9618234,\n",
" 'index': 6,\n",
" 'word': 're',\n",
" 'start': 11,\n",
" 'end': 13},\n",
" {'entity': 'r1:arg1|tem',\n",
" 'score': 0.7678349,\n",
" 'index': 11,\n",
" 'word': 'you',\n",
" 'start': 32,\n",
" 'end': 35},\n",
" {'entity': 'r1:root',\n",
" 'score': 0.6855395,\n",
" 'index': 13,\n",
" 'word': 'be',\n",
" 'start': 43,\n",
" 'end': 45},\n",
" {'entity': 'r1:arg2|atr',\n",
" 'score': 0.93614143,\n",
" 'index': 14,\n",
" 'word': 'able',\n",
" 'start': 46,\n",
" 'end': 50},\n",
" {'entity': 'r2:arg2|ben',\n",
" 'score': 0.5274996,\n",
" 'index': 17,\n",
" 'word': 'me',\n",
" 'start': 59,\n",
" 'end': 61},\n",
" {'entity': 'r2:arg1|pat',\n",
" 'score': 0.36818993,\n",
" 'index': 20,\n",
" 'word': 'story',\n",
" 'start': 72,\n",
" 'end': 77},\n",
" {'entity': 'r4:arg1|tem',\n",
" 'score': 0.4968183,\n",
" 'index': 27,\n",
" 'word': 'which',\n",
" 'start': 102,\n",
" 'end': 107},\n",
" {'entity': 'r3:root',\n",
" 'score': 0.5518882,\n",
" 'index': 30,\n",
" 'word': 'is',\n",
" 'start': 118,\n",
" 'end': 120},\n",
" {'entity': 'r4:arg2|atr',\n",
" 'score': 0.7494934,\n",
" 'index': 31,\n",
" 'word': 'evidence',\n",
" 'start': 121,\n",
" 'end': 129},\n",
" {'entity': 'r6:arg1|tem',\n",
" 'score': 0.108391255,\n",
" 'index': 33,\n",
" 'word': 'there',\n",
" 'start': 135,\n",
" 'end': 140},\n",
" {'entity': 'r6:root',\n",
" 'score': 0.52937764,\n",
" 'index': 34,\n",
" 'word': 'is',\n",
" 'start': 141,\n",
" 'end': 143},\n",
" {'entity': 'r5:arg1|tem',\n",
" 'score': 0.44316128,\n",
" 'index': 35,\n",
" 'word': 'life',\n",
" 'start': 144,\n",
" 'end': 148},\n",
" {'entity': 'r7:arg1|tem',\n",
" 'score': 0.27223623,\n",
" 'index': 42,\n",
" 'word': 'face',\n",
" 'start': 171,\n",
" 'end': 175},\n",
" {'entity': 'r7:root',\n",
" 'score': 0.75835687,\n",
" 'index': 44,\n",
" 'word': 'created',\n",
" 'start': 180,\n",
" 'end': 187},\n",
" {'entity': 'r7:arg0|agt',\n",
" 'score': 0.40680537,\n",
" 'index': 45,\n",
" 'word': 'by',\n",
" 'start': 188,\n",
" 'end': 190},\n",
" {'entity': 'r8:arg0|agt',\n",
" 'score': 0.17703968,\n",
" 'index': 60,\n",
" 'word': 'Viking',\n",
" 'start': 240,\n",
" 'end': 246},\n",
" {'entity': 'r7:arg0|agt',\n",
" 'score': 0.26997983,\n",
" 'index': 62,\n",
" 'word': 'spacecraft',\n",
" 'start': 249,\n",
" 'end': 259},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.22341944,\n",
" 'index': 64,\n",
" 'word': 'ci',\n",
" 'start': 264,\n",
" 'end': 266},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.18148886,\n",
" 'index': 65,\n",
" 'word': '##rc',\n",
" 'start': 266,\n",
" 'end': 268},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.21423559,\n",
" 'index': 66,\n",
" 'word': '##ling',\n",
" 'start': 268,\n",
" 'end': 272},\n",
" {'entity': 'r7:arg1|pat',\n",
" 'score': 0.16891205,\n",
" 'index': 68,\n",
" 'word': 'planet',\n",
" 'start': 277,\n",
" 'end': 283},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.1664749,\n",
" 'index': 71,\n",
" 'word': '##pping',\n",
" 'start': 288,\n",
" 'end': 293},\n",
" {'entity': 'r8:arg1|pat',\n",
" 'score': 0.09886724,\n",
" 'index': 72,\n",
" 'word': 'photos',\n",
" 'start': 294,\n",
" 'end': 300},\n",
" {'entity': 'r8:arg0|agt',\n",
" 'score': 0.16721433,\n",
" 'index': 75,\n",
" 'word': 'it',\n",
" 'start': 307,\n",
" 'end': 309},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.13445386,\n",
" 'index': 76,\n",
" 'word': 'spotted',\n",
" 'start': 310,\n",
" 'end': 317},\n",
" {'entity': 'r8:arg1|pat',\n",
" 'score': 0.07838027,\n",
" 'index': 81,\n",
" 'word': 'like',\n",
" 'start': 330,\n",
" 'end': 334},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.063725084,\n",
" 'index': 82,\n",
" 'word': '##ness',\n",
" 'start': 334,\n",
" 'end': 338},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.041758526,\n",
" 'index': 89,\n",
" 'word': 'scientists',\n",
" 'start': 359,\n",
" 'end': 369},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.058230672,\n",
" 'index': 90,\n",
" 'word': 'figure',\n",
" 'start': 370,\n",
" 'end': 376},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.06219335,\n",
" 'index': 91,\n",
" 'word': '##d',\n",
" 'start': 376,\n",
" 'end': 377},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.035768963,\n",
" 'index': 94,\n",
" 'word': 'it',\n",
" 'start': 387,\n",
" 'end': 389},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.04963572,\n",
" 'index': 95,\n",
" 'word': 'was',\n",
" 'start': 390,\n",
" 'end': 393},\n",
" {'entity': 'r8:argM|adv',\n",
" 'score': 0.03322343,\n",
" 'index': 103,\n",
" 'word': 'around',\n",
" 'start': 428,\n",
" 'end': 434},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.038057458,\n",
" 'index': 110,\n",
" 'word': 'one',\n",
" 'start': 454,\n",
" 'end': 457},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.0465628,\n",
" 'index': 111,\n",
" 'word': 'had',\n",
" 'start': 458,\n",
" 'end': 461},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.032235544,\n",
" 'index': 112,\n",
" 'word': 'sh',\n",
" 'start': 462,\n",
" 'end': 464},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.0322965,\n",
" 'index': 113,\n",
" 'word': '##adow',\n",
" 'start': 464,\n",
" 'end': 468},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.034283824,\n",
" 'index': 114,\n",
" 'word': '##s',\n",
" 'start': 468,\n",
" 'end': 469},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.030779108,\n",
" 'index': 115,\n",
" 'word': 'that',\n",
" 'start': 470,\n",
" 'end': 474},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.046148807,\n",
" 'index': 116,\n",
" 'word': 'made',\n",
" 'start': 475,\n",
" 'end': 479},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.035636976,\n",
" 'index': 117,\n",
" 'word': 'it',\n",
" 'start': 480,\n",
" 'end': 482},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.043233275,\n",
" 'index': 118,\n",
" 'word': 'look',\n",
" 'start': 483,\n",
" 'end': 487},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.031466834,\n",
" 'index': 119,\n",
" 'word': 'like',\n",
" 'start': 488,\n",
" 'end': 492},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.02962308,\n",
" 'index': 130,\n",
" 'word': 'later',\n",
" 'start': 528,\n",
" 'end': 533},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.03362092,\n",
" 'index': 132,\n",
" 'word': 'we',\n",
" 'start': 535,\n",
" 'end': 537},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.04920107,\n",
" 'index': 133,\n",
" 'word': 'revealed',\n",
" 'start': 538,\n",
" 'end': 546},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.035394482,\n",
" 'index': 135,\n",
" 'word': 'image',\n",
" 'start': 551,\n",
" 'end': 556},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.032336798,\n",
" 'index': 136,\n",
" 'word': 'for',\n",
" 'start': 557,\n",
" 'end': 560},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.045922153,\n",
" 'index': 139,\n",
" 'word': 'see',\n",
" 'start': 568,\n",
" 'end': 571},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.035783116,\n",
" 'index': 142,\n",
" 'word': 'we',\n",
" 'start': 577,\n",
" 'end': 579},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.05480558,\n",
" 'index': 143,\n",
" 'word': 'made',\n",
" 'start': 580,\n",
" 'end': 584},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.030482573,\n",
" 'index': 144,\n",
" 'word': 'sure',\n",
" 'start': 585,\n",
" 'end': 589},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.03615033,\n",
" 'index': 148,\n",
" 'word': 'it',\n",
" 'start': 603,\n",
" 'end': 605},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.05102072,\n",
" 'index': 149,\n",
" 'word': 'was',\n",
" 'start': 606,\n",
" 'end': 609},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.032937527,\n",
" 'index': 152,\n",
" 'word': 'rock',\n",
" 'start': 617,\n",
" 'end': 621},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.03535375,\n",
" 'index': 153,\n",
" 'word': 'formation',\n",
" 'start': 622,\n",
" 'end': 631},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.047777873,\n",
" 'index': 154,\n",
" 'word': 'that',\n",
" 'start': 632,\n",
" 'end': 636},\n",
" {'entity': 'r8:argM|adv',\n",
" 'score': 0.0540176,\n",
" 'index': 155,\n",
" 'word': 'just',\n",
" 'start': 637,\n",
" 'end': 641},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.06253041,\n",
" 'index': 156,\n",
" 'word': 'res',\n",
" 'start': 642,\n",
" 'end': 645},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.060185175,\n",
" 'index': 157,\n",
" 'word': '##emble',\n",
" 'start': 645,\n",
" 'end': 650},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.07396809,\n",
" 'index': 158,\n",
" 'word': '##d',\n",
" 'start': 650,\n",
" 'end': 651},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.03149155,\n",
" 'index': 161,\n",
" 'word': 'head',\n",
" 'start': 660,\n",
" 'end': 664},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.06597179,\n",
" 'index': 170,\n",
" 'word': 'formed',\n",
" 'start': 693,\n",
" 'end': 699},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.031629615,\n",
" 'index': 171,\n",
" 'word': 'by',\n",
" 'start': 700,\n",
" 'end': 702},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.060809337,\n",
" 'index': 176,\n",
" 'word': 'We',\n",
" 'start': 712,\n",
" 'end': 714},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.07441239,\n",
" 'index': 178,\n",
" 'word': 'announced',\n",
" 'start': 720,\n",
" 'end': 729},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.054461885,\n",
" 'index': 179,\n",
" 'word': 'it',\n",
" 'start': 730,\n",
" 'end': 732},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.05075702,\n",
" 'index': 181,\n",
" 'word': 'we',\n",
" 'start': 741,\n",
" 'end': 743},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.052541845,\n",
" 'index': 182,\n",
" 'word': 'thought',\n",
" 'start': 744,\n",
" 'end': 751},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.057083007,\n",
" 'index': 183,\n",
" 'word': 'it',\n",
" 'start': 752,\n",
" 'end': 754},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.05793729,\n",
" 'index': 185,\n",
" 'word': 'be',\n",
" 'start': 761,\n",
" 'end': 763},\n",
" {'entity': 'r8:arg2|atr',\n",
" 'score': 0.042124726,\n",
" 'index': 188,\n",
" 'word': 'way',\n",
" 'start': 771,\n",
" 'end': 774},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.06260392,\n",
" 'index': 190,\n",
" 'word': 'engage',\n",
" 'start': 778,\n",
" 'end': 784},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.04784509,\n",
" 'index': 192,\n",
" 'word': 'public',\n",
" 'start': 789,\n",
" 'end': 795},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.055024274,\n",
" 'index': 200,\n",
" 'word': 'at',\n",
" 'start': 822,\n",
" 'end': 824},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.05680457,\n",
" 'index': 201,\n",
" 'word': '##rra',\n",
" 'start': 824,\n",
" 'end': 827},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.054766916,\n",
" 'index': 202,\n",
" 'word': '##ct',\n",
" 'start': 827,\n",
" 'end': 829},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.03638437,\n",
" 'index': 203,\n",
" 'word': 'attention',\n",
" 'start': 830,\n",
" 'end': 839},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.02923426,\n",
" 'index': 204,\n",
" 'word': 'to',\n",
" 'start': 840,\n",
" 'end': 842},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.03866812,\n",
" 'index': 209,\n",
" 'word': 'it',\n",
" 'start': 854,\n",
" 'end': 856},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.04308056,\n",
" 'index': 210,\n",
" 'word': 'did',\n",
" 'start': 857,\n",
" 'end': 860},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.04369669,\n",
" 'index': 213,\n",
" 'word': 'face',\n",
" 'start': 867,\n",
" 'end': 871},\n",
" {'entity': 'r8:argM|adv',\n",
" 'score': 0.024818424,\n",
" 'index': 216,\n",
" 'word': 'soon',\n",
" 'start': 880,\n",
" 'end': 884},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.05038194,\n",
" 'index': 217,\n",
" 'word': 'became',\n",
" 'start': 885,\n",
" 'end': 891},\n",
" {'entity': 'r8:arg2|atr',\n",
" 'score': 0.04120699,\n",
" 'index': 220,\n",
" 'word': 'i',\n",
" 'start': 898,\n",
" 'end': 899},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.05299865,\n",
" 'index': 223,\n",
" 'word': 'shot',\n",
" 'start': 904,\n",
" 'end': 908},\n",
" {'entity': 'r8:argM|adv',\n",
" 'score': 0.030761532,\n",
" 'index': 224,\n",
" 'word': 'in',\n",
" 'start': 909,\n",
" 'end': 911},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.082203425,\n",
" 'index': 227,\n",
" 'word': 'appeared',\n",
" 'start': 920,\n",
" 'end': 928},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.024252772,\n",
" 'index': 228,\n",
" 'word': 'in',\n",
" 'start': 929,\n",
" 'end': 931},\n",
" {'entity': 'r7:argM|tmp',\n",
" 'score': 0.08839744,\n",
" 'index': 247,\n",
" 'word': 'for',\n",
" 'start': 1009,\n",
" 'end': 1012},\n",
" {'entity': 'r7:arg0|agt',\n",
" 'score': 0.252022,\n",
" 'index': 252,\n",
" 'word': 'people',\n",
" 'start': 1028,\n",
" 'end': 1034},\n",
" {'entity': 'r8:root',\n",
" 'score': 0.31143436,\n",
" 'index': 253,\n",
" 'word': 'thought',\n",
" 'start': 1035,\n",
" 'end': 1042},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.17507401,\n",
" 'index': 256,\n",
" 'word': 'land',\n",
" 'start': 1055,\n",
" 'end': 1059},\n",
" {'entity': 'r7:arg1|tem',\n",
" 'score': 0.18424971,\n",
" 'index': 257,\n",
" 'word': '##form',\n",
" 'start': 1059,\n",
" 'end': 1063},\n",
" {'entity': 'r8:root',\n",
" 'score': 0.26880854,\n",
" 'index': 258,\n",
" 'word': 'was',\n",
" 'start': 1064,\n",
" 'end': 1067},\n",
" {'entity': 'r8:arg2|atr',\n",
" 'score': 0.124720916,\n",
" 'index': 259,\n",
" 'word': 'evidence',\n",
" 'start': 1068,\n",
" 'end': 1076},\n",
" {'entity': 'r7:arg0|agt',\n",
" 'score': 0.44198242,\n",
" 'index': 268,\n",
" 'word': 'scientists',\n",
" 'start': 1106,\n",
" 'end': 1116},\n",
" {'entity': 'r7:root',\n",
" 'score': 0.71973616,\n",
" 'index': 269,\n",
" 'word': 'wanted',\n",
" 'start': 1117,\n",
" 'end': 1123},\n",
" {'entity': 'r7:root',\n",
" 'score': 0.65194285,\n",
" 'index': 271,\n",
" 'word': 'hide',\n",
" 'start': 1127,\n",
" 'end': 1131},\n",
" {'entity': 'r7:arg1|pat',\n",
" 'score': 0.10310135,\n",
" 'index': 272,\n",
" 'word': 'it',\n",
" 'start': 1132,\n",
" 'end': 1134},\n",
" {'entity': 'r5:arg0|agt',\n",
" 'score': 0.37039754,\n",
" 'index': 278,\n",
" 'word': 'defender',\n",
" 'start': 1152,\n",
" 'end': 1160},\n",
" {'entity': 'r4:arg0|agt',\n",
" 'score': 0.1837553,\n",
" 'index': 279,\n",
" 'word': '##s',\n",
" 'start': 1160,\n",
" 'end': 1161},\n",
" {'entity': 'r4:arg2|atr',\n",
" 'score': 0.1392899,\n",
" 'index': 285,\n",
" 'word': 'there',\n",
" 'start': 1186,\n",
" 'end': 1191},\n",
" {'entity': 'r5:root',\n",
" 'score': 0.16138868,\n",
" 'index': 286,\n",
" 'word': 'was',\n",
" 'start': 1192,\n",
" 'end': 1195},\n",
" {'entity': 'r4:arg1|tem',\n",
" 'score': 0.25208256,\n",
" 'index': 288,\n",
" 'word': 'civili',\n",
" 'start': 1204,\n",
" 'end': 1210},\n",
" {'entity': 'r5:arg1|tem',\n",
" 'score': 0.2960885,\n",
" 'index': 289,\n",
" 'word': '##zation',\n",
" 'start': 1210,\n",
" 'end': 1216},\n",
" {'entity': 'r8:arg0|agt',\n",
" 'score': 0.08236764,\n",
" 'index': 293,\n",
" 'word': 'We',\n",
" 'start': 1226,\n",
" 'end': 1228},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.085176714,\n",
" 'index': 294,\n",
" 'word': 'decided',\n",
" 'start': 1229,\n",
" 'end': 1236},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.09762525,\n",
" 'index': 296,\n",
" 'word': 'take',\n",
" 'start': 1240,\n",
" 'end': 1244},\n",
" {'entity': 'r8:arg1|pat',\n",
" 'score': 0.038402967,\n",
" 'index': 298,\n",
" 'word': 'shot',\n",
" 'start': 1253,\n",
" 'end': 1257},\n",
" {'entity': 'r9:root',\n",
" 'score': 0.07043696,\n",
" 'index': 301,\n",
" 'word': 'make',\n",
" 'start': 1266,\n",
" 'end': 1270},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.034738,\n",
" 'index': 302,\n",
" 'word': 'sure',\n",
" 'start': 1271,\n",
" 'end': 1275},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.03654291,\n",
" 'index': 303,\n",
" 'word': 'we',\n",
" 'start': 1276,\n",
" 'end': 1278},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.054115873,\n",
" 'index': 304,\n",
" 'word': 'were',\n",
" 'start': 1279,\n",
" 'end': 1283},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.044537026,\n",
" 'index': 305,\n",
" 'word': '##n',\n",
" 'start': 1283,\n",
" 'end': 1284},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.043383468,\n",
" 'index': 307,\n",
" 'word': 't',\n",
" 'start': 1285,\n",
" 'end': 1286},\n",
" {'entity': 'r8:arg2|atr',\n",
" 'score': 0.050688915,\n",
" 'index': 308,\n",
" 'word': 'wrong',\n",
" 'start': 1287,\n",
" 'end': 1292},\n",
" {'entity': 'r8:argM|adv',\n",
" 'score': 0.030045124,\n",
" 'index': 310,\n",
" 'word': 'on',\n",
" 'start': 1294,\n",
" 'end': 1296},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.031087063,\n",
" 'index': 316,\n",
" 'word': 'Michael',\n",
" 'start': 1312,\n",
" 'end': 1319},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.037183713,\n",
" 'index': 317,\n",
" 'word': 'Mali',\n",
" 'start': 1320,\n",
" 'end': 1324},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.037349515,\n",
" 'index': 318,\n",
" 'word': '##n',\n",
" 'start': 1324,\n",
" 'end': 1325},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.038758732,\n",
" 'index': 325,\n",
" 'word': 'team',\n",
" 'start': 1354,\n",
" 'end': 1358},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.05292711,\n",
" 'index': 326,\n",
" 'word': 'took',\n",
" 'start': 1359,\n",
" 'end': 1363},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.031445753,\n",
" 'index': 328,\n",
" 'word': 'picture',\n",
" 'start': 1366,\n",
" 'end': 1373},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.039939146,\n",
" 'index': 329,\n",
" 'word': 'that',\n",
" 'start': 1374,\n",
" 'end': 1378},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.05810041,\n",
" 'index': 330,\n",
" 'word': 'was',\n",
" 'start': 1379,\n",
" 'end': 1382},\n",
" {'entity': 'r8:arg2|atr',\n",
" 'score': 0.04002515,\n",
" 'index': 333,\n",
" 'word': 'sharp',\n",
" 'start': 1393,\n",
" 'end': 1398},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.03314902,\n",
" 'index': 334,\n",
" 'word': '##er',\n",
" 'start': 1398,\n",
" 'end': 1400},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.04910727,\n",
" 'index': 341,\n",
" 'word': 'reveal',\n",
" 'start': 1434,\n",
" 'end': 1440},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.05482279,\n",
" 'index': 342,\n",
" 'word': '##ing',\n",
" 'start': 1440,\n",
" 'end': 1443},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.033936784,\n",
" 'index': 345,\n",
" 'word': 'land',\n",
" 'start': 1454,\n",
" 'end': 1458},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.034768764,\n",
" 'index': 348,\n",
" 'word': 'which',\n",
" 'start': 1464,\n",
" 'end': 1469},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.049932696,\n",
" 'index': 349,\n",
" 'word': 'meant',\n",
" 'start': 1470,\n",
" 'end': 1475},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.035454,\n",
" 'index': 352,\n",
" 'word': 'monument',\n",
" 'start': 1485,\n",
" 'end': 1493},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.033968676,\n",
" 'index': 357,\n",
" 'word': 'picture',\n",
" 'start': 1505,\n",
" 'end': 1512},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.053387593,\n",
" 'index': 358,\n",
" 'word': 'wasn',\n",
" 'start': 1513,\n",
" 'end': 1517},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.041064143,\n",
" 'index': 360,\n",
" 'word': 't',\n",
" 'start': 1518,\n",
" 'end': 1519},\n",
" {'entity': 'r8:arg2|atr',\n",
" 'score': 0.046866965,\n",
" 'index': 362,\n",
" 'word': 'clear',\n",
" 'start': 1525,\n",
" 'end': 1530},\n",
" {'entity': 'r8:argM|adv',\n",
" 'score': 0.036013737,\n",
" 'index': 363,\n",
" 'word': 'at',\n",
" 'start': 1531,\n",
" 'end': 1533},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.046605524,\n",
" 'index': 366,\n",
" 'word': 'which',\n",
" 'start': 1539,\n",
" 'end': 1544},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.051832046,\n",
" 'index': 368,\n",
" 'word': 'mean',\n",
" 'start': 1551,\n",
" 'end': 1555},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.03425833,\n",
" 'index': 369,\n",
" 'word': 'alien',\n",
" 'start': 1556,\n",
" 'end': 1561},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.036131877,\n",
" 'index': 370,\n",
" 'word': 'marking',\n",
" 'start': 1562,\n",
" 'end': 1569},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.040733457,\n",
" 'index': 371,\n",
" 'word': '##s',\n",
" 'start': 1569,\n",
" 'end': 1570},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.056199037,\n",
" 'index': 372,\n",
" 'word': 'were',\n",
" 'start': 1571,\n",
" 'end': 1575},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.04729384,\n",
" 'index': 373,\n",
" 'word': 'hidden',\n",
" 'start': 1576,\n",
" 'end': 1582},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.02660689,\n",
" 'index': 374,\n",
" 'word': 'by',\n",
" 'start': 1583,\n",
" 'end': 1585},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.0323595,\n",
" 'index': 381,\n",
" 'word': 'ye',\n",
" 'start': 1601,\n",
" 'end': 1603},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.037608866,\n",
" 'index': 384,\n",
" 'word': 'rum',\n",
" 'start': 1610,\n",
" 'end': 1613},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.042922124,\n",
" 'index': 385,\n",
" 'word': '##or',\n",
" 'start': 1613,\n",
" 'end': 1615},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.04341532,\n",
" 'index': 386,\n",
" 'word': 'started',\n",
" 'start': 1616,\n",
" 'end': 1623},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.054916393,\n",
" 'index': 390,\n",
" 'word': 'prove',\n",
" 'start': 1632,\n",
" 'end': 1637},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.03691343,\n",
" 'index': 391,\n",
" 'word': 'them',\n",
" 'start': 1638,\n",
" 'end': 1642},\n",
" {'entity': 'r8:arg2|atr',\n",
" 'score': 0.030148128,\n",
" 'index': 392,\n",
" 'word': 'wrong',\n",
" 'start': 1643,\n",
" 'end': 1648},\n",
" {'entity': 'r8:argM|adv',\n",
" 'score': 0.028092569,\n",
" 'index': 393,\n",
" 'word': 'on',\n",
" 'start': 1649,\n",
" 'end': 1651},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.039598208,\n",
" 'index': 398,\n",
" 'word': 'we',\n",
" 'start': 1666,\n",
" 'end': 1668},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.051345967,\n",
" 'index': 399,\n",
" 'word': 'decided',\n",
" 'start': 1669,\n",
" 'end': 1676},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.049982574,\n",
" 'index': 401,\n",
" 'word': 'take',\n",
" 'start': 1680,\n",
" 'end': 1684},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.03342623,\n",
" 'index': 403,\n",
" 'word': 'picture',\n",
" 'start': 1693,\n",
" 'end': 1700},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.044696685,\n",
" 'index': 405,\n",
" 'word': 'making',\n",
" 'start': 1702,\n",
" 'end': 1708},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.03436369,\n",
" 'index': 406,\n",
" 'word': 'sure',\n",
" 'start': 1709,\n",
" 'end': 1713},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.03859569,\n",
" 'index': 407,\n",
" 'word': 'it',\n",
" 'start': 1714,\n",
" 'end': 1716},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.0494223,\n",
" 'index': 408,\n",
" 'word': 'was',\n",
" 'start': 1717,\n",
" 'end': 1720},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.02929864,\n",
" 'index': 412,\n",
" 'word': 'summer',\n",
" 'start': 1733,\n",
" 'end': 1739},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.029290752,\n",
" 'index': 413,\n",
" 'word': 'day',\n",
" 'start': 1740,\n",
" 'end': 1743},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.035252254,\n",
" 'index': 415,\n",
" 'word': 'Mali',\n",
" 'start': 1745,\n",
" 'end': 1749},\n",
" {'entity': 'r8:arg1|tem',\n",
" 'score': 0.036368903,\n",
" 'index': 416,\n",
" 'word': '##n',\n",
" 'start': 1749,\n",
" 'end': 1750},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.03298566,\n",
" 'index': 419,\n",
" 'word': 'team',\n",
" 'start': 1753,\n",
" 'end': 1757},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.047498554,\n",
" 'index': 420,\n",
" 'word': 'captured',\n",
" 'start': 1758,\n",
" 'end': 1766},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.03272342,\n",
" 'index': 424,\n",
" 'word': 'photo',\n",
" 'start': 1778,\n",
" 'end': 1783},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.03630522,\n",
" 'index': 425,\n",
" 'word': 'using',\n",
" 'start': 1784,\n",
" 'end': 1789},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.030974206,\n",
" 'index': 434,\n",
" 'word': 'With',\n",
" 'start': 1832,\n",
" 'end': 1836},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.0339595,\n",
" 'index': 437,\n",
" 'word': 'you',\n",
" 'start': 1849,\n",
" 'end': 1852},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.040549394,\n",
" 'index': 438,\n",
" 'word': 'can',\n",
" 'start': 1853,\n",
" 'end': 1856},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.042945094,\n",
" 'index': 439,\n",
" 'word': 'disc',\n",
" 'start': 1857,\n",
" 'end': 1861},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.04496943,\n",
" 'index': 440,\n",
" 'word': '##ern',\n",
" 'start': 1861,\n",
" 'end': 1864},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.033708826,\n",
" 'index': 441,\n",
" 'word': 'things',\n",
" 'start': 1865,\n",
" 'end': 1871},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.03150944,\n",
" 'index': 442,\n",
" 'word': 'in',\n",
" 'start': 1872,\n",
" 'end': 1874},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.033492185,\n",
" 'index': 455,\n",
" 'word': 'which',\n",
" 'start': 1927,\n",
" 'end': 1932},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.044837367,\n",
" 'index': 456,\n",
" 'word': 'means',\n",
" 'start': 1933,\n",
" 'end': 1938},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.03930233,\n",
" 'index': 458,\n",
" 'word': 'there',\n",
" 'start': 1942,\n",
" 'end': 1947},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.046380974,\n",
" 'index': 459,\n",
" 'word': 'were',\n",
" 'start': 1948,\n",
" 'end': 1952},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.03521819,\n",
" 'index': 461,\n",
" 'word': 'signs',\n",
" 'start': 1957,\n",
" 'end': 1962},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.03437573,\n",
" 'index': 465,\n",
" 'word': 'you',\n",
" 'start': 1972,\n",
" 'end': 1975},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.040590625,\n",
" 'index': 466,\n",
" 'word': 'could',\n",
" 'start': 1976,\n",
" 'end': 1981},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.0304144,\n",
" 'index': 467,\n",
" 'word': 'easily',\n",
" 'start': 1982,\n",
" 'end': 1988},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.048064638,\n",
" 'index': 468,\n",
" 'word': 'see',\n",
" 'start': 1989,\n",
" 'end': 1992},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.032679703,\n",
" 'index': 469,\n",
" 'word': 'what',\n",
" 'start': 1993,\n",
" 'end': 1997},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.037320852,\n",
" 'index': 470,\n",
" 'word': 'they',\n",
" 'start': 1998,\n",
" 'end': 2002},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.047912642,\n",
" 'index': 471,\n",
" 'word': 'were',\n",
" 'start': 2003,\n",
" 'end': 2007},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.034035392,\n",
" 'index': 473,\n",
" 'word': 'What',\n",
" 'start': 2009,\n",
" 'end': 2013},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.036057852,\n",
" 'index': 475,\n",
" 'word': 'picture',\n",
" 'start': 2018,\n",
" 'end': 2025},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.047634307,\n",
" 'index': 476,\n",
" 'word': 'showed',\n",
" 'start': 2026,\n",
" 'end': 2032},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.04636057,\n",
" 'index': 477,\n",
" 'word': 'was',\n",
" 'start': 2033,\n",
" 'end': 2036},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.03571351,\n",
" 'index': 479,\n",
" 'word': 'but',\n",
" 'start': 2041,\n",
" 'end': 2044},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.035763755,\n",
" 'index': 480,\n",
" 'word': '##te',\n",
" 'start': 2044,\n",
" 'end': 2046},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.03430853,\n",
" 'index': 484,\n",
" 'word': 'which',\n",
" 'start': 2056,\n",
" 'end': 2061},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.04987236,\n",
" 'index': 485,\n",
" 'word': 'are',\n",
" 'start': 2062,\n",
" 'end': 2065},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.03454835,\n",
" 'index': 486,\n",
" 'word': 'land',\n",
" 'start': 2066,\n",
" 'end': 2070},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.035414834,\n",
" 'index': 487,\n",
" 'word': '##form',\n",
" 'start': 2070,\n",
" 'end': 2074},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.038371794,\n",
" 'index': 488,\n",
" 'word': '##s',\n",
" 'start': 2074,\n",
" 'end': 2075},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.03423538,\n",
" 'index': 489,\n",
" 'word': 'common',\n",
" 'start': 2076,\n",
" 'end': 2082},\n",
" {'entity': 'r10:root',\n",
" 'score': 0.032777835,\n",
" 'index': 490,\n",
" 'word': 'around',\n",
" 'start': 2083,\n",
" 'end': 2089}]"
]
},
"execution_count": 109,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"mbruton/spa_en_mBERT\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"mbruton/spa_en_mBERT\")\n",
"nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"\n",
"ner_results = nlp(text)\n",
"ner_results\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 110,
"id": "b46505fd-da7c-40a5-b666-29e67291ffc7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"r0:arg1|tem 1\n",
"r0:root 2\n",
"r10:root 113\n",
"r1:arg1|tem 1\n",
"r1:arg2|atr 1\n",
"r1:root 1\n",
"r2:arg1|pat 1\n",
"r2:arg2|ben 1\n",
"r3:root 1\n",
"r4:arg0|agt 1\n",
"r4:arg1|tem 2\n",
"r4:arg2|atr 2\n",
"r5:arg0|agt 1\n",
"r5:arg1|tem 2\n",
"r5:root 1\n",
"r6:arg1|tem 1\n",
"r6:root 1\n",
"r7:arg0|agt 4\n",
"r7:arg1|pat 2\n",
"r7:arg1|tem 2\n",
"r7:argM|tmp 1\n",
"r7:root 3\n",
"r8:arg0|agt 3\n",
"r8:arg1|pat 3\n",
"r8:arg1|tem 29\n",
"r8:arg2|atr 7\n",
"r8:argM|adv 7\n",
"r8:root 2\n",
"r9:root 9\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word \n",
"r0:arg1|tem you 1\n",
"r0:root 1\n",
" re 1\n",
"r10:root ##adow 1\n",
" ##ct 1\n",
" ##d 2\n",
" ##emble 1\n",
" ##er 1\n",
" ##ern 1\n",
" ##form 1\n",
" ##ing 1\n",
" ##n 2\n",
" ##rra 1\n",
" ##s 3\n",
" ##te 1\n",
" Mali 1\n",
" What 1\n",
" With 1\n",
" alien 1\n",
" announced 1\n",
" are 1\n",
" around 1\n",
" at 1\n",
" be 1\n",
" became 1\n",
" but 1\n",
" by 1\n",
" can 1\n",
" captured 1\n",
" common 1\n",
" could 1\n",
" day 1\n",
" decided 1\n",
" did 1\n",
" disc 1\n",
" easily 1\n",
" engage 1\n",
" figure 1\n",
" for 1\n",
" formation 1\n",
" formed 1\n",
" had 1\n",
" head 1\n",
" hidden 1\n",
" image 1\n",
" in 2\n",
" it 2\n",
" land 2\n",
" later 1\n",
" like 1\n",
" look 1\n",
" made 2\n",
" making 1\n",
" marking 1\n",
" mean 1\n",
" means 1\n",
" meant 1\n",
" monument 1\n",
" photo 1\n",
" picture 3\n",
" prove 1\n",
" res 1\n",
" reveal 1\n",
" revealed 1\n",
" rock 1\n",
" see 2\n",
" sh 1\n",
" shot 1\n",
" showed 1\n",
" signs 1\n",
" started 1\n",
" summer 1\n",
" sure 3\n",
" t 2\n",
" take 1\n",
" team 2\n",
" there 1\n",
" they 1\n",
" things 1\n",
" thought 1\n",
" to 1\n",
" took 1\n",
" using 1\n",
" was 5\n",
" wasn 1\n",
" we 2\n",
" were 4\n",
" what 1\n",
" which 2\n",
" ye 1\n",
" you 2\n",
"r1:arg1|tem you 1\n",
"r1:arg2|atr able 1\n",
"r1:root be 1\n",
"r2:arg1|pat story 1\n",
"r2:arg2|ben me 1\n",
"r3:root is 1\n",
"r4:arg0|agt ##s 1\n",
"r4:arg1|tem civili 1\n",
" which 1\n",
"r4:arg2|atr evidence 1\n",
" there 1\n",
"r5:arg0|agt defender 1\n",
"r5:arg1|tem ##zation 1\n",
" life 1\n",
"r5:root was 1\n",
"r6:arg1|tem there 1\n",
"r6:root is 1\n",
"r7:arg0|agt by 1\n",
" people 1\n",
" scientists 1\n",
" spacecraft 1\n",
"r7:arg1|pat it 1\n",
" planet 1\n",
"r7:arg1|tem ##form 1\n",
" face 1\n",
"r7:argM|tmp for 1\n",
"r7:root created 1\n",
" hide 1\n",
" wanted 1\n",
"r8:arg0|agt Viking 1\n",
" We 1\n",
" it 1\n",
"r8:arg1|pat like 1\n",
" photos 1\n",
" shot 1\n",
"r8:arg1|tem ##n 1\n",
" ##ness 1\n",
" ##or 1\n",
" Mali 1\n",
" Michael 1\n",
" We 1\n",
" attention 1\n",
" by 1\n",
" face 1\n",
" it 5\n",
" land 1\n",
" one 1\n",
" picture 1\n",
" public 1\n",
" rum 1\n",
" scientists 1\n",
" that 3\n",
" them 1\n",
" we 3\n",
" which 2\n",
"r8:arg2|atr clear 1\n",
" evidence 1\n",
" i 1\n",
" sharp 1\n",
" way 1\n",
" wrong 2\n",
"r8:argM|adv around 1\n",
" at 1\n",
" in 1\n",
" just 1\n",
" on 2\n",
" soon 1\n",
"r8:root thought 1\n",
" was 1\n",
"r9:root ##ling 1\n",
" ##pping 1\n",
" ##rc 1\n",
" appeared 1\n",
" ci 1\n",
" decided 1\n",
" make 1\n",
" spotted 1\n",
" take 1\n",
"dtype: int64"
]
},
"execution_count": 110,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"with open(\"50 mbrutonspa_en_mBERT.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "9bd22ec6-6192-4ff8-b482-b7fdbfac312c",
"metadata": {},
"source": [
"## 51 BSC-LT/roberta_model_for_anonimization"
]
},
{
"cell_type": "code",
"execution_count": 132,
"id": "6cd4a769-e99c-453f-83c6-447c4c69ec1b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'entity': 'S-ORG',\n",
" 'score': 0.9931144,\n",
" 'index': 10,\n",
" 'word': 'ĠNASA',\n",
" 'start': 38,\n",
" 'end': 42},\n",
" {'entity': 'S-OTH',\n",
" 'score': 0.9904603,\n",
" 'index': 21,\n",
" 'word': 'ĠCara',\n",
" 'start': 94,\n",
" 'end': 98},\n",
" {'entity': 'S-LOC',\n",
" 'score': 0.9779458,\n",
" 'index': 23,\n",
" 'word': 'ĠMarte',\n",
" 'start': 102,\n",
" 'end': 107},\n",
" {'entity': 'S-LOC',\n",
" 'score': 0.9959857,\n",
" 'index': 34,\n",
" 'word': 'ĠMarte',\n",
" 'start': 156,\n",
" 'end': 161},\n",
" {'entity': 'B-OTH',\n",
" 'score': 0.9917984,\n",
" 'index': 57,\n",
" 'word': 'ĠVik',\n",
" 'start': 269,\n",
" 'end': 272},\n",
" {'entity': 'I-OTH',\n",
" 'score': 0.9757009,\n",
" 'index': 58,\n",
" 'word': 'ing',\n",
" 'start': 272,\n",
" 'end': 275},\n",
" {'entity': 'E-OTH',\n",
" 'score': 0.9817541,\n",
" 'index': 59,\n",
" 'word': 'Ġ1',\n",
" 'start': 276,\n",
" 'end': 277},\n",
" {'entity': 'S-LOC',\n",
" 'score': 0.99622846,\n",
" 'index': 96,\n",
" 'word': 'ĠC',\n",
" 'start': 503,\n",
" 'end': 504},\n",
" {'entity': 'S-ORG',\n",
" 'score': 0.990908,\n",
" 'index': 182,\n",
" 'word': 'ĠNASA',\n",
" 'start': 919,\n",
" 'end': 923},\n",
" {'entity': 'S-LOC',\n",
" 'score': 0.9959258,\n",
" 'index': 188,\n",
" 'word': 'ĠMarte',\n",
" 'start': 947,\n",
" 'end': 952},\n",
" {'entity': 'S-LOC',\n",
" 'score': 0.99050736,\n",
" 'index': 199,\n",
" 'word': 'ĠMarte',\n",
" 'start': 979,\n",
" 'end': 984},\n",
" {'entity': 'S-LOC',\n",
" 'score': 0.9929755,\n",
" 'index': 255,\n",
" 'word': 'ĠMarte',\n",
" 'start': 1252,\n",
" 'end': 1257},\n",
" {'entity': 'S-ORG',\n",
" 'score': 0.9937528,\n",
" 'index': 275,\n",
" 'word': 'ĠNASA',\n",
" 'start': 1357,\n",
" 'end': 1361},\n",
" {'entity': 'S-LOC',\n",
" 'score': 0.9968929,\n",
" 'index': 284,\n",
" 'word': 'ĠMarte',\n",
" 'start': 1412,\n",
" 'end': 1417},\n",
" {'entity': 'B-PER',\n",
" 'score': 0.99851614,\n",
" 'index': 308,\n",
" 'word': 'ĠMichael',\n",
" 'start': 1515,\n",
" 'end': 1522},\n",
" {'entity': 'E-PER',\n",
" 'score': 0.9982216,\n",
" 'index': 309,\n",
" 'word': 'ĠMal',\n",
" 'start': 1523,\n",
" 'end': 1526},\n",
" {'entity': 'E-PER',\n",
" 'score': 0.9815487,\n",
" 'index': 310,\n",
" 'word': 'in',\n",
" 'start': 1526,\n",
" 'end': 1528},\n",
" {'entity': 'B-ORG',\n",
" 'score': 0.79048795,\n",
" 'index': 317,\n",
" 'word': 'ĠMars',\n",
" 'start': 1554,\n",
" 'end': 1558},\n",
" {'entity': 'E-ORG',\n",
" 'score': 0.807663,\n",
" 'index': 318,\n",
" 'word': 'ĠOr',\n",
" 'start': 1559,\n",
" 'end': 1561},\n",
" {'entity': 'E-ORG',\n",
" 'score': 0.6876012,\n",
" 'index': 319,\n",
" 'word': 'bi',\n",
" 'start': 1561,\n",
" 'end': 1563},\n",
" {'entity': 'E-ORG',\n",
" 'score': 0.60779643,\n",
" 'index': 320,\n",
" 'word': 'ter',\n",
" 'start': 1563,\n",
" 'end': 1566},\n",
" {'entity': 'S-OTH',\n",
" 'score': 0.9860491,\n",
" 'index': 336,\n",
" 'word': 'ĠVik',\n",
" 'start': 1655,\n",
" 'end': 1658},\n",
" {'entity': 'S-PER',\n",
" 'score': 0.9582744,\n",
" 'index': 427,\n",
" 'word': 'ĠMal',\n",
" 'start': 2082,\n",
" 'end': 2085}]"
]
},
"execution_count": 132,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"text=\"\"\"Entonces, si eres un científico de la NASA, deberías poder contarme toda la historia sobre la Cara en Marte, que obviamente es evidencia de que hay vida en Marte y que la cara fue creada por extraterrestres, ¿correcto?\" No. Hace veinticinco años, nuestra nave espacial Viking 1 estaba dando vueltas alrededor del planeta, tomando fotografías, cuando detectó la sombra de un rostro humano. Los científicos estadounidenses descubrieron que se trataba simplemente de otra mesa marciana, común alrededor de Cydonia, solo que ésta tenía sombras. Eso lo hizo parecer un faraón egipcio. Muy pocos días después, revelamos la imagen para que todos la vieran, y nos aseguramos de notar que era una enorme formación rocosa que simplemente se parecía a una cabeza y un rostro humanos, pero todo era. formado por sombras Sólo lo anunciamos porque pensamos que sería una buena manera de involucrar al público con los hallazgos de la NASA y atraer la atención a Marte, y así fue.\n",
"\n",
"El rostro de Marte pronto se convirtió en un ícono pop; filmada en películas, apareció en libros, revistas, programas de radio y en las colas de las cajas de las tiendas de comestibles durante 25 años. Algunas personas pensaron que la forma natural del relieve era evidencia de vida en Marte, y que los científicos queríamos ocultarla, pero en realidad, los defensores del presupuesto de la NASA desearían que hubiera una civilización antigua en Marte. Decidimos tomar otra foto solo para asegurarnos de no estar equivocados, el 5 de abril de 1998. Michael Malin y su equipo de cámara de Mars Orbiter tomaron una fotografía que era diez veces más nítida que las fotografías originales del Viking, revelando una forma de relieve natural, que No significaba ningún monumento alienígena. \"Pero esa imagen no era muy clara en absoluto, lo que podría significar que las marcas alienígenas estaban ocultas por la neblina\" Bueno, no, sí, ese rumor comenzó, pero para demostrar que estaban equivocados, el 8 de abril de 2001 decidimos tomar otra fotografía, asegurándonos de que Era un día de verano sin nubes. El equipo de Malin capturó una fotografía asombrosa utilizando la revolución máxima absoluta de la cámara. Con esta cámara puedes discernir cosas en una imagen digital, 3 veces más grande que el tamaño de un píxel, lo que significa que si hubiera señales de vida, podrías ver fácilmente cuáles eran. Lo que la imagen mostraba era la colina o mesa, que son accidentes geográficos comunes en el oeste americano.\"\"\"\n",
"\n",
"tokenizer = AutoTokenizer.from_pretrained(\"BSC-LT/roberta_model_for_anonimization\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"BSC-LT/roberta_model_for_anonimization\")\n",
"nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"\n",
"ner_results = nlp(text)\n",
"ner_results"
]
},
{
"cell_type": "code",
"execution_count": 133,
"id": "b46cee5f-5626-4ef8-b5de-8c57f5a142a8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entity\n",
"B-ORG 1\n",
"B-OTH 1\n",
"B-PER 1\n",
"E-ORG 3\n",
"E-OTH 1\n",
"E-PER 2\n",
"I-OTH 1\n",
"S-LOC 7\n",
"S-ORG 3\n",
"S-OTH 2\n",
"S-PER 1\n",
"dtype: int64\n"
]
},
{
"data": {
"text/plain": [
"entity word \n",
"B-ORG ĠMars 1\n",
"B-OTH ĠVik 1\n",
"B-PER ĠMichael 1\n",
"E-ORG bi 1\n",
" ter 1\n",
" ĠOr 1\n",
"E-OTH Ġ1 1\n",
"E-PER in 1\n",
" ĠMal 1\n",
"I-OTH ing 1\n",
"S-LOC ĠC 1\n",
" ĠMarte 6\n",
"S-ORG ĠNASA 3\n",
"S-OTH ĠCara 1\n",
" ĠVik 1\n",
"S-PER ĠMal 1\n",
"dtype: int64"
]
},
"execution_count": 133,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"with open(\"51 BSC-LTroberta_model_for_anonimization.json\", encoding='utf-8') as f:\n",
" data = json.load(f)\n",
"aux=obtener_dataframe(data)\n",
"aux\n",
"print(aux.groupby(['entity']).size())\n",
"aux.groupby(['entity', 'word']).size()"
]
},
{
"cell_type": "markdown",
"id": "7c1f2159-3e79-4d4b-a4dc-b690d563763d",
"metadata": {},
"source": [
"## 52 aymurai/anonymizer-beto-cased-flair"
]
},
{
"cell_type": "code",
"execution_count": 111,
"id": "432508a1-4f16-494f-9ae2-6a206d2dbe2e",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\NW\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
" warnings.warn(\n"
]
},
{
"ename": "OSError",
"evalue": "aymurai/anonymizer-beto-cased-flair does not appear to have a file named config.json. Checkout 'https://huggingface.co/aymurai/anonymizer-beto-cased-flair/tree/main' for available files.",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mHTTPError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\utils\\_errors.py:304\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[1;34m(response, endpoint_name)\u001b[0m\n\u001b[0;32m 303\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 304\u001b[0m response\u001b[38;5;241m.\u001b[39mraise_for_status()\n\u001b[0;32m 305\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError \u001b[38;5;28;01mas\u001b[39;00m e:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\requests\\models.py:1024\u001b[0m, in \u001b[0;36mResponse.raise_for_status\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1023\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m http_error_msg:\n\u001b[1;32m-> 1024\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(http_error_msg, response\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m)\n",
"\u001b[1;31mHTTPError\u001b[0m: 404 Client Error: Not Found for url: https://huggingface.co/aymurai/anonymizer-beto-cased-flair/resolve/main/config.json",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[1;31mEntryNotFoundError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\utils\\hub.py:399\u001b[0m, in \u001b[0;36mcached_file\u001b[1;34m(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)\u001b[0m\n\u001b[0;32m 397\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 398\u001b[0m \u001b[38;5;66;03m# Load from URL or cache if already cached\u001b[39;00m\n\u001b[1;32m--> 399\u001b[0m resolved_file \u001b[38;5;241m=\u001b[39m hf_hub_download(\n\u001b[0;32m 400\u001b[0m path_or_repo_id,\n\u001b[0;32m 401\u001b[0m filename,\n\u001b[0;32m 402\u001b[0m subfolder\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(subfolder) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m subfolder,\n\u001b[0;32m 403\u001b[0m repo_type\u001b[38;5;241m=\u001b[39mrepo_type,\n\u001b[0;32m 404\u001b[0m revision\u001b[38;5;241m=\u001b[39mrevision,\n\u001b[0;32m 405\u001b[0m cache_dir\u001b[38;5;241m=\u001b[39mcache_dir,\n\u001b[0;32m 406\u001b[0m user_agent\u001b[38;5;241m=\u001b[39muser_agent,\n\u001b[0;32m 407\u001b[0m force_download\u001b[38;5;241m=\u001b[39mforce_download,\n\u001b[0;32m 408\u001b[0m proxies\u001b[38;5;241m=\u001b[39mproxies,\n\u001b[0;32m 409\u001b[0m resume_download\u001b[38;5;241m=\u001b[39mresume_download,\n\u001b[0;32m 410\u001b[0m token\u001b[38;5;241m=\u001b[39mtoken,\n\u001b[0;32m 411\u001b[0m local_files_only\u001b[38;5;241m=\u001b[39mlocal_files_only,\n\u001b[0;32m 412\u001b[0m )\n\u001b[0;32m 413\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m GatedRepoError \u001b[38;5;28;01mas\u001b[39;00m e:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\utils\\_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.._inner_fn\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 112\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[1;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1221\u001b[0m, in \u001b[0;36mhf_hub_download\u001b[1;34m(repo_id, filename, subfolder, repo_type, revision, library_name, library_version, cache_dir, local_dir, user_agent, force_download, proxies, etag_timeout, token, local_files_only, headers, endpoint, legacy_cache_layout, resume_download, force_filename, local_dir_use_symlinks)\u001b[0m\n\u001b[0;32m 1220\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1221\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _hf_hub_download_to_cache_dir(\n\u001b[0;32m 1222\u001b[0m \u001b[38;5;66;03m# Destination\u001b[39;00m\n\u001b[0;32m 1223\u001b[0m cache_dir\u001b[38;5;241m=\u001b[39mcache_dir,\n\u001b[0;32m 1224\u001b[0m \u001b[38;5;66;03m# File info\u001b[39;00m\n\u001b[0;32m 1225\u001b[0m repo_id\u001b[38;5;241m=\u001b[39mrepo_id,\n\u001b[0;32m 1226\u001b[0m filename\u001b[38;5;241m=\u001b[39mfilename,\n\u001b[0;32m 1227\u001b[0m repo_type\u001b[38;5;241m=\u001b[39mrepo_type,\n\u001b[0;32m 1228\u001b[0m revision\u001b[38;5;241m=\u001b[39mrevision,\n\u001b[0;32m 1229\u001b[0m \u001b[38;5;66;03m# HTTP info\u001b[39;00m\n\u001b[0;32m 1230\u001b[0m headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[0;32m 1231\u001b[0m proxies\u001b[38;5;241m=\u001b[39mproxies,\n\u001b[0;32m 1232\u001b[0m etag_timeout\u001b[38;5;241m=\u001b[39metag_timeout,\n\u001b[0;32m 1233\u001b[0m endpoint\u001b[38;5;241m=\u001b[39mendpoint,\n\u001b[0;32m 1234\u001b[0m \u001b[38;5;66;03m# Additional options\u001b[39;00m\n\u001b[0;32m 1235\u001b[0m local_files_only\u001b[38;5;241m=\u001b[39mlocal_files_only,\n\u001b[0;32m 1236\u001b[0m force_download\u001b[38;5;241m=\u001b[39mforce_download,\n\u001b[0;32m 1237\u001b[0m )\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1282\u001b[0m, in \u001b[0;36m_hf_hub_download_to_cache_dir\u001b[1;34m(cache_dir, repo_id, filename, repo_type, revision, headers, proxies, etag_timeout, endpoint, local_files_only, force_download)\u001b[0m\n\u001b[0;32m 1280\u001b[0m \u001b[38;5;66;03m# Try to get metadata (etag, commit_hash, url, size) from the server.\u001b[39;00m\n\u001b[0;32m 1281\u001b[0m \u001b[38;5;66;03m# If we can't, a HEAD request error is returned.\u001b[39;00m\n\u001b[1;32m-> 1282\u001b[0m (url_to_download, etag, commit_hash, expected_size, head_call_error) \u001b[38;5;241m=\u001b[39m _get_metadata_or_catch_error(\n\u001b[0;32m 1283\u001b[0m repo_id\u001b[38;5;241m=\u001b[39mrepo_id,\n\u001b[0;32m 1284\u001b[0m filename\u001b[38;5;241m=\u001b[39mfilename,\n\u001b[0;32m 1285\u001b[0m repo_type\u001b[38;5;241m=\u001b[39mrepo_type,\n\u001b[0;32m 1286\u001b[0m revision\u001b[38;5;241m=\u001b[39mrevision,\n\u001b[0;32m 1287\u001b[0m endpoint\u001b[38;5;241m=\u001b[39mendpoint,\n\u001b[0;32m 1288\u001b[0m proxies\u001b[38;5;241m=\u001b[39mproxies,\n\u001b[0;32m 1289\u001b[0m etag_timeout\u001b[38;5;241m=\u001b[39metag_timeout,\n\u001b[0;32m 1290\u001b[0m headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[0;32m 1291\u001b[0m local_files_only\u001b[38;5;241m=\u001b[39mlocal_files_only,\n\u001b[0;32m 1292\u001b[0m storage_folder\u001b[38;5;241m=\u001b[39mstorage_folder,\n\u001b[0;32m 1293\u001b[0m relative_filename\u001b[38;5;241m=\u001b[39mrelative_filename,\n\u001b[0;32m 1294\u001b[0m )\n\u001b[0;32m 1296\u001b[0m \u001b[38;5;66;03m# etag can be None for several reasons:\u001b[39;00m\n\u001b[0;32m 1297\u001b[0m \u001b[38;5;66;03m# 1. we passed local_files_only.\u001b[39;00m\n\u001b[0;32m 1298\u001b[0m \u001b[38;5;66;03m# 2. we don't have a connection\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 1304\u001b[0m \u001b[38;5;66;03m# If the specified revision is a commit hash, look inside \"snapshots\".\u001b[39;00m\n\u001b[0;32m 1305\u001b[0m \u001b[38;5;66;03m# If the specified revision is a branch or tag, look inside \"refs\".\u001b[39;00m\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1722\u001b[0m, in \u001b[0;36m_get_metadata_or_catch_error\u001b[1;34m(repo_id, filename, repo_type, revision, endpoint, proxies, etag_timeout, headers, local_files_only, relative_filename, storage_folder)\u001b[0m\n\u001b[0;32m 1721\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m-> 1722\u001b[0m metadata \u001b[38;5;241m=\u001b[39m get_hf_file_metadata(url\u001b[38;5;241m=\u001b[39murl, proxies\u001b[38;5;241m=\u001b[39mproxies, timeout\u001b[38;5;241m=\u001b[39metag_timeout, headers\u001b[38;5;241m=\u001b[39mheaders)\n\u001b[0;32m 1723\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m EntryNotFoundError \u001b[38;5;28;01mas\u001b[39;00m http_error:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\utils\\_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.._inner_fn\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 112\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[1;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1645\u001b[0m, in \u001b[0;36mget_hf_file_metadata\u001b[1;34m(url, token, proxies, timeout, library_name, library_version, user_agent, headers)\u001b[0m\n\u001b[0;32m 1644\u001b[0m \u001b[38;5;66;03m# Retrieve metadata\u001b[39;00m\n\u001b[1;32m-> 1645\u001b[0m r \u001b[38;5;241m=\u001b[39m _request_wrapper(\n\u001b[0;32m 1646\u001b[0m method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHEAD\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 1647\u001b[0m url\u001b[38;5;241m=\u001b[39murl,\n\u001b[0;32m 1648\u001b[0m headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[0;32m 1649\u001b[0m allow_redirects\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m 1650\u001b[0m follow_relative_redirects\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[0;32m 1651\u001b[0m proxies\u001b[38;5;241m=\u001b[39mproxies,\n\u001b[0;32m 1652\u001b[0m timeout\u001b[38;5;241m=\u001b[39mtimeout,\n\u001b[0;32m 1653\u001b[0m )\n\u001b[0;32m 1654\u001b[0m hf_raise_for_status(r)\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:372\u001b[0m, in \u001b[0;36m_request_wrapper\u001b[1;34m(method, url, follow_relative_redirects, **params)\u001b[0m\n\u001b[0;32m 371\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m follow_relative_redirects:\n\u001b[1;32m--> 372\u001b[0m response \u001b[38;5;241m=\u001b[39m _request_wrapper(\n\u001b[0;32m 373\u001b[0m method\u001b[38;5;241m=\u001b[39mmethod,\n\u001b[0;32m 374\u001b[0m url\u001b[38;5;241m=\u001b[39murl,\n\u001b[0;32m 375\u001b[0m follow_relative_redirects\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m 376\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams,\n\u001b[0;32m 377\u001b[0m )\n\u001b[0;32m 379\u001b[0m \u001b[38;5;66;03m# If redirection, we redirect only relative paths.\u001b[39;00m\n\u001b[0;32m 380\u001b[0m \u001b[38;5;66;03m# This is useful in case of a renamed repository.\u001b[39;00m\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:396\u001b[0m, in \u001b[0;36m_request_wrapper\u001b[1;34m(method, url, follow_relative_redirects, **params)\u001b[0m\n\u001b[0;32m 395\u001b[0m response \u001b[38;5;241m=\u001b[39m get_session()\u001b[38;5;241m.\u001b[39mrequest(method\u001b[38;5;241m=\u001b[39mmethod, url\u001b[38;5;241m=\u001b[39murl, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams)\n\u001b[1;32m--> 396\u001b[0m hf_raise_for_status(response)\n\u001b[0;32m 397\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\utils\\_errors.py:315\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[1;34m(response, endpoint_name)\u001b[0m\n\u001b[0;32m 314\u001b[0m message \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mstatus_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m Client Error.\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEntry Not Found for url: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m--> 315\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m EntryNotFoundError(message, response) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[0;32m 317\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m error_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGatedRepo\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n",
"\u001b[1;31mEntryNotFoundError\u001b[0m: 404 Client Error. (Request ID: Root=1-6689e043-48997c467113ccac7d10140e;22177459-16a6-4735-8391-d445af8a31ee)\n\nEntry Not Found for url: https://huggingface.co/aymurai/anonymizer-beto-cased-flair/resolve/main/config.json.",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[1;31mOSError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[111], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoTokenizer, AutoModelForTokenClassification\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124maymurai/anonymizer-beto-cased-flair\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124maymurai/anonymizer-beto-cased-flair\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m nlp \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:934\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 931\u001b[0m trust_remote_code \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtrust_remote_code\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[0;32m 932\u001b[0m code_revision \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcode_revision\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m--> 934\u001b[0m config_dict, unused_kwargs \u001b[38;5;241m=\u001b[39m PretrainedConfig\u001b[38;5;241m.\u001b[39mget_config_dict(pretrained_model_name_or_path, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 935\u001b[0m has_remote_code \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config_dict \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoConfig\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m 936\u001b[0m has_local_code \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config_dict \u001b[38;5;129;01mand\u001b[39;00m config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;129;01min\u001b[39;00m CONFIG_MAPPING\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\configuration_utils.py:632\u001b[0m, in \u001b[0;36mPretrainedConfig.get_config_dict\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 630\u001b[0m original_kwargs \u001b[38;5;241m=\u001b[39m copy\u001b[38;5;241m.\u001b[39mdeepcopy(kwargs)\n\u001b[0;32m 631\u001b[0m \u001b[38;5;66;03m# Get config dict associated with the base config file\u001b[39;00m\n\u001b[1;32m--> 632\u001b[0m config_dict, kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_get_config_dict(pretrained_model_name_or_path, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 633\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_commit_hash\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config_dict:\n\u001b[0;32m 634\u001b[0m original_kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_commit_hash\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_commit_hash\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\configuration_utils.py:689\u001b[0m, in \u001b[0;36mPretrainedConfig._get_config_dict\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 685\u001b[0m configuration_file \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_configuration_file\u001b[39m\u001b[38;5;124m\"\u001b[39m, CONFIG_NAME) \u001b[38;5;28;01mif\u001b[39;00m gguf_file \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m gguf_file\n\u001b[0;32m 687\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 688\u001b[0m \u001b[38;5;66;03m# Load from local folder or from cache or download from model Hub and cache\u001b[39;00m\n\u001b[1;32m--> 689\u001b[0m resolved_config_file \u001b[38;5;241m=\u001b[39m cached_file(\n\u001b[0;32m 690\u001b[0m pretrained_model_name_or_path,\n\u001b[0;32m 691\u001b[0m configuration_file,\n\u001b[0;32m 692\u001b[0m cache_dir\u001b[38;5;241m=\u001b[39mcache_dir,\n\u001b[0;32m 693\u001b[0m force_download\u001b[38;5;241m=\u001b[39mforce_download,\n\u001b[0;32m 694\u001b[0m proxies\u001b[38;5;241m=\u001b[39mproxies,\n\u001b[0;32m 695\u001b[0m resume_download\u001b[38;5;241m=\u001b[39mresume_download,\n\u001b[0;32m 696\u001b[0m local_files_only\u001b[38;5;241m=\u001b[39mlocal_files_only,\n\u001b[0;32m 697\u001b[0m token\u001b[38;5;241m=\u001b[39mtoken,\n\u001b[0;32m 698\u001b[0m user_agent\u001b[38;5;241m=\u001b[39muser_agent,\n\u001b[0;32m 699\u001b[0m revision\u001b[38;5;241m=\u001b[39mrevision,\n\u001b[0;32m 700\u001b[0m subfolder\u001b[38;5;241m=\u001b[39msubfolder,\n\u001b[0;32m 701\u001b[0m _commit_hash\u001b[38;5;241m=\u001b[39mcommit_hash,\n\u001b[0;32m 702\u001b[0m )\n\u001b[0;32m 703\u001b[0m commit_hash \u001b[38;5;241m=\u001b[39m extract_commit_hash(resolved_config_file, commit_hash)\n\u001b[0;32m 704\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m:\n\u001b[0;32m 705\u001b[0m \u001b[38;5;66;03m# Raise any environment error raise by `cached_file`. It will have a helpful error message adapted to\u001b[39;00m\n\u001b[0;32m 706\u001b[0m \u001b[38;5;66;03m# the original exception.\u001b[39;00m\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\utils\\hub.py:453\u001b[0m, in \u001b[0;36mcached_file\u001b[1;34m(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)\u001b[0m\n\u001b[0;32m 451\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m revision \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 452\u001b[0m revision \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmain\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m--> 453\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[0;32m 454\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_or_repo_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m does not appear to have a file named \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfull_filename\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m. Checkout \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 455\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttps://huggingface.co/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_or_repo_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/tree/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrevision\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m for available files.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 456\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[0;32m 457\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m 458\u001b[0m resolved_file \u001b[38;5;241m=\u001b[39m _get_cache_file_to_return(path_or_repo_id, full_filename, cache_dir, revision)\n",
"\u001b[1;31mOSError\u001b[0m: aymurai/anonymizer-beto-cased-flair does not appear to have a file named config.json. Checkout 'https://huggingface.co/aymurai/anonymizer-beto-cased-flair/tree/main' for available files."
]
}
],
"source": [
"\n",
"\n",
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"aymurai/anonymizer-beto-cased-flair\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"aymurai/anonymizer-beto-cased-flair\")\n",
"nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"\n",
"ner_results = nlp(text)\n",
"ner_results\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "51143fc8-92cc-4c5f-88c5-e295452c3b6a",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "8b203e13-8095-4a04-b490-78677533942b",
"metadata": {},
"source": [
"## 53 google-bert/bert-large-cased-whole-word-masking"
]
},
{
"cell_type": "code",
"execution_count": 113,
"id": "f03e9cdc-f9d1-4eca-b059-89e3955e22c5",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-large-cased-whole-word-masking and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
},
{
"data": {
"text/plain": [
"[{'entity': 'LABEL_1',\n",
" 'score': 0.7333207,\n",
" 'index': 1,\n",
" 'word': 'So',\n",
" 'start': 0,\n",
" 'end': 2},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7897263,\n",
" 'index': 2,\n",
" 'word': ',',\n",
" 'start': 2,\n",
" 'end': 3},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.61105645,\n",
" 'index': 3,\n",
" 'word': 'if',\n",
" 'start': 4,\n",
" 'end': 6},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6179384,\n",
" 'index': 4,\n",
" 'word': 'you',\n",
" 'start': 7,\n",
" 'end': 10},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.51219755,\n",
" 'index': 5,\n",
" 'word': \"'\",\n",
" 'start': 10,\n",
" 'end': 11},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7380822,\n",
" 'index': 6,\n",
" 'word': 're',\n",
" 'start': 11,\n",
" 'end': 13},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.65777117,\n",
" 'index': 7,\n",
" 'word': 'a',\n",
" 'start': 14,\n",
" 'end': 15},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.53536695,\n",
" 'index': 8,\n",
" 'word': 'NASA',\n",
" 'start': 16,\n",
" 'end': 20},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5658676,\n",
" 'index': 9,\n",
" 'word': 'scientist',\n",
" 'start': 21,\n",
" 'end': 30},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7322473,\n",
" 'index': 10,\n",
" 'word': ',',\n",
" 'start': 30,\n",
" 'end': 31},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.63136214,\n",
" 'index': 11,\n",
" 'word': 'you',\n",
" 'start': 32,\n",
" 'end': 35},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6426238,\n",
" 'index': 12,\n",
" 'word': 'should',\n",
" 'start': 36,\n",
" 'end': 42},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5245657,\n",
" 'index': 13,\n",
" 'word': 'be',\n",
" 'start': 43,\n",
" 'end': 45},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.6180293,\n",
" 'index': 14,\n",
" 'word': 'able',\n",
" 'start': 46,\n",
" 'end': 50},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5129229,\n",
" 'index': 15,\n",
" 'word': 'to',\n",
" 'start': 51,\n",
" 'end': 53},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.52398765,\n",
" 'index': 16,\n",
" 'word': 'tell',\n",
" 'start': 54,\n",
" 'end': 58},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.60980695,\n",
" 'index': 17,\n",
" 'word': 'me',\n",
" 'start': 59,\n",
" 'end': 61},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.61398715,\n",
" 'index': 18,\n",
" 'word': 'the',\n",
" 'start': 62,\n",
" 'end': 65},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7106103,\n",
" 'index': 19,\n",
" 'word': 'whole',\n",
" 'start': 66,\n",
" 'end': 71},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6377772,\n",
" 'index': 20,\n",
" 'word': 'story',\n",
" 'start': 72,\n",
" 'end': 77},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6342355,\n",
" 'index': 21,\n",
" 'word': 'about',\n",
" 'start': 78,\n",
" 'end': 83},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6174517,\n",
" 'index': 22,\n",
" 'word': 'the',\n",
" 'start': 84,\n",
" 'end': 87},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6358006,\n",
" 'index': 23,\n",
" 'word': 'Face',\n",
" 'start': 88,\n",
" 'end': 92},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5708014,\n",
" 'index': 24,\n",
" 'word': 'On',\n",
" 'start': 93,\n",
" 'end': 95},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.6247826,\n",
" 'index': 25,\n",
" 'word': 'Mars',\n",
" 'start': 96,\n",
" 'end': 100},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.79095966,\n",
" 'index': 26,\n",
" 'word': ',',\n",
" 'start': 100,\n",
" 'end': 101},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.67212445,\n",
" 'index': 27,\n",
" 'word': 'which',\n",
" 'start': 102,\n",
" 'end': 107},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7683761,\n",
" 'index': 28,\n",
" 'word': 'obviously',\n",
" 'start': 108,\n",
" 'end': 117},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5656431,\n",
" 'index': 29,\n",
" 'word': 'is',\n",
" 'start': 118,\n",
" 'end': 120},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5710077,\n",
" 'index': 30,\n",
" 'word': 'evidence',\n",
" 'start': 121,\n",
" 'end': 129},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6670097,\n",
" 'index': 31,\n",
" 'word': 'that',\n",
" 'start': 130,\n",
" 'end': 134},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6908868,\n",
" 'index': 32,\n",
" 'word': 'there',\n",
" 'start': 135,\n",
" 'end': 140},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.59239066,\n",
" 'index': 33,\n",
" 'word': 'is',\n",
" 'start': 141,\n",
" 'end': 143},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7072231,\n",
" 'index': 34,\n",
" 'word': 'life',\n",
" 'start': 144,\n",
" 'end': 148},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.64615613,\n",
" 'index': 35,\n",
" 'word': 'on',\n",
" 'start': 149,\n",
" 'end': 151},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.60538524,\n",
" 'index': 36,\n",
" 'word': 'Mars',\n",
" 'start': 152,\n",
" 'end': 156},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.83394796,\n",
" 'index': 37,\n",
" 'word': ',',\n",
" 'start': 156,\n",
" 'end': 157},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6373182,\n",
" 'index': 38,\n",
" 'word': 'and',\n",
" 'start': 158,\n",
" 'end': 161},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.732228,\n",
" 'index': 39,\n",
" 'word': 'that',\n",
" 'start': 162,\n",
" 'end': 166},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.55135024,\n",
" 'index': 40,\n",
" 'word': 'the',\n",
" 'start': 167,\n",
" 'end': 170},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5254089,\n",
" 'index': 41,\n",
" 'word': 'face',\n",
" 'start': 171,\n",
" 'end': 175},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.61335075,\n",
" 'index': 42,\n",
" 'word': 'was',\n",
" 'start': 176,\n",
" 'end': 179},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74078745,\n",
" 'index': 43,\n",
" 'word': 'created',\n",
" 'start': 180,\n",
" 'end': 187},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7517857,\n",
" 'index': 44,\n",
" 'word': 'by',\n",
" 'start': 188,\n",
" 'end': 190},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.510251,\n",
" 'index': 45,\n",
" 'word': 'aliens',\n",
" 'start': 191,\n",
" 'end': 197},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7680581,\n",
" 'index': 46,\n",
" 'word': ',',\n",
" 'start': 197,\n",
" 'end': 198},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5634767,\n",
" 'index': 47,\n",
" 'word': 'correct',\n",
" 'start': 199,\n",
" 'end': 206},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.66794556,\n",
" 'index': 48,\n",
" 'word': '?',\n",
" 'start': 206,\n",
" 'end': 207},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6206324,\n",
" 'index': 49,\n",
" 'word': '\"',\n",
" 'start': 207,\n",
" 'end': 208},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.69091505,\n",
" 'index': 50,\n",
" 'word': 'No',\n",
" 'start': 209,\n",
" 'end': 211},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.82622874,\n",
" 'index': 51,\n",
" 'word': ',',\n",
" 'start': 211,\n",
" 'end': 212},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6986776,\n",
" 'index': 52,\n",
" 'word': 'twenty',\n",
" 'start': 213,\n",
" 'end': 219},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.587492,\n",
" 'index': 53,\n",
" 'word': 'five',\n",
" 'start': 220,\n",
" 'end': 224},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5896044,\n",
" 'index': 54,\n",
" 'word': 'years',\n",
" 'start': 225,\n",
" 'end': 230},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5274402,\n",
" 'index': 55,\n",
" 'word': 'ago',\n",
" 'start': 231,\n",
" 'end': 234},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.65922695,\n",
" 'index': 56,\n",
" 'word': ',',\n",
" 'start': 234,\n",
" 'end': 235},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.56813127,\n",
" 'index': 57,\n",
" 'word': 'our',\n",
" 'start': 236,\n",
" 'end': 239},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.58480746,\n",
" 'index': 58,\n",
" 'word': 'Viking',\n",
" 'start': 240,\n",
" 'end': 246},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.6104148,\n",
" 'index': 59,\n",
" 'word': '1',\n",
" 'start': 247,\n",
" 'end': 248},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.60260046,\n",
" 'index': 60,\n",
" 'word': 'spacecraft',\n",
" 'start': 249,\n",
" 'end': 259},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6534109,\n",
" 'index': 61,\n",
" 'word': 'was',\n",
" 'start': 260,\n",
" 'end': 263},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.70453274,\n",
" 'index': 62,\n",
" 'word': 'circling',\n",
" 'start': 264,\n",
" 'end': 272},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.59796387,\n",
" 'index': 63,\n",
" 'word': 'the',\n",
" 'start': 273,\n",
" 'end': 276},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.57720464,\n",
" 'index': 64,\n",
" 'word': 'planet',\n",
" 'start': 277,\n",
" 'end': 283},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.81815565,\n",
" 'index': 65,\n",
" 'word': ',',\n",
" 'start': 283,\n",
" 'end': 284},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6159785,\n",
" 'index': 66,\n",
" 'word': 'snapping',\n",
" 'start': 285,\n",
" 'end': 293},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5178575,\n",
" 'index': 67,\n",
" 'word': 'photos',\n",
" 'start': 294,\n",
" 'end': 300},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7781755,\n",
" 'index': 68,\n",
" 'word': ',',\n",
" 'start': 300,\n",
" 'end': 301},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.58284646,\n",
" 'index': 69,\n",
" 'word': 'when',\n",
" 'start': 302,\n",
" 'end': 306},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6206713,\n",
" 'index': 70,\n",
" 'word': 'it',\n",
" 'start': 307,\n",
" 'end': 309},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7120494,\n",
" 'index': 71,\n",
" 'word': 'spotted',\n",
" 'start': 310,\n",
" 'end': 317},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5560274,\n",
" 'index': 72,\n",
" 'word': 'the',\n",
" 'start': 318,\n",
" 'end': 321},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6203119,\n",
" 'index': 73,\n",
" 'word': 'shadowy',\n",
" 'start': 322,\n",
" 'end': 329},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5314929,\n",
" 'index': 74,\n",
" 'word': 'like',\n",
" 'start': 330,\n",
" 'end': 334},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5537888,\n",
" 'index': 75,\n",
" 'word': '##ness',\n",
" 'start': 334,\n",
" 'end': 338},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.605519,\n",
" 'index': 76,\n",
" 'word': 'of',\n",
" 'start': 339,\n",
" 'end': 341},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6460082,\n",
" 'index': 77,\n",
" 'word': 'a',\n",
" 'start': 342,\n",
" 'end': 343},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5739153,\n",
" 'index': 78,\n",
" 'word': 'human',\n",
" 'start': 344,\n",
" 'end': 349},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.57964504,\n",
" 'index': 79,\n",
" 'word': 'face',\n",
" 'start': 350,\n",
" 'end': 354},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.67440754,\n",
" 'index': 80,\n",
" 'word': '.',\n",
" 'start': 354,\n",
" 'end': 355},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5449105,\n",
" 'index': 81,\n",
" 'word': 'Us',\n",
" 'start': 356,\n",
" 'end': 358},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5455108,\n",
" 'index': 82,\n",
" 'word': 'scientists',\n",
" 'start': 359,\n",
" 'end': 369},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.64505374,\n",
" 'index': 83,\n",
" 'word': 'figured',\n",
" 'start': 370,\n",
" 'end': 377},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5902839,\n",
" 'index': 84,\n",
" 'word': 'out',\n",
" 'start': 378,\n",
" 'end': 381},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.65235263,\n",
" 'index': 85,\n",
" 'word': 'that',\n",
" 'start': 382,\n",
" 'end': 386},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6934572,\n",
" 'index': 86,\n",
" 'word': 'it',\n",
" 'start': 387,\n",
" 'end': 389},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.66032475,\n",
" 'index': 87,\n",
" 'word': 'was',\n",
" 'start': 390,\n",
" 'end': 393},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.57023394,\n",
" 'index': 88,\n",
" 'word': 'just',\n",
" 'start': 394,\n",
" 'end': 398},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.60775185,\n",
" 'index': 89,\n",
" 'word': 'another',\n",
" 'start': 399,\n",
" 'end': 406},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.70214206,\n",
" 'index': 90,\n",
" 'word': 'Martian',\n",
" 'start': 407,\n",
" 'end': 414},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6318522,\n",
" 'index': 91,\n",
" 'word': 'me',\n",
" 'start': 415,\n",
" 'end': 417},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6979068,\n",
" 'index': 92,\n",
" 'word': '##sa',\n",
" 'start': 417,\n",
" 'end': 419},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7611192,\n",
" 'index': 93,\n",
" 'word': ',',\n",
" 'start': 419,\n",
" 'end': 420},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72079974,\n",
" 'index': 94,\n",
" 'word': 'common',\n",
" 'start': 421,\n",
" 'end': 427},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6922783,\n",
" 'index': 95,\n",
" 'word': 'around',\n",
" 'start': 428,\n",
" 'end': 434},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5261554,\n",
" 'index': 96,\n",
" 'word': 'Cy',\n",
" 'start': 435,\n",
" 'end': 437},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5093499,\n",
" 'index': 97,\n",
" 'word': '##don',\n",
" 'start': 437,\n",
" 'end': 440},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5632856,\n",
" 'index': 98,\n",
" 'word': '##ia',\n",
" 'start': 440,\n",
" 'end': 442},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7464533,\n",
" 'index': 99,\n",
" 'word': ',',\n",
" 'start': 442,\n",
" 'end': 443},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.54192007,\n",
" 'index': 100,\n",
" 'word': 'only',\n",
" 'start': 444,\n",
" 'end': 448},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.67456824,\n",
" 'index': 101,\n",
" 'word': 'this',\n",
" 'start': 449,\n",
" 'end': 453},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6420857,\n",
" 'index': 102,\n",
" 'word': 'one',\n",
" 'start': 454,\n",
" 'end': 457},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6792174,\n",
" 'index': 103,\n",
" 'word': 'had',\n",
" 'start': 458,\n",
" 'end': 461},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5339916,\n",
" 'index': 104,\n",
" 'word': 'shadows',\n",
" 'start': 462,\n",
" 'end': 469},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.643012,\n",
" 'index': 105,\n",
" 'word': 'that',\n",
" 'start': 470,\n",
" 'end': 474},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5314313,\n",
" 'index': 106,\n",
" 'word': 'made',\n",
" 'start': 475,\n",
" 'end': 479},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.59911275,\n",
" 'index': 107,\n",
" 'word': 'it',\n",
" 'start': 480,\n",
" 'end': 482},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5045175,\n",
" 'index': 108,\n",
" 'word': 'look',\n",
" 'start': 483,\n",
" 'end': 487},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6535474,\n",
" 'index': 109,\n",
" 'word': 'like',\n",
" 'start': 488,\n",
" 'end': 492},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5992719,\n",
" 'index': 110,\n",
" 'word': 'an',\n",
" 'start': 493,\n",
" 'end': 495},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.56163996,\n",
" 'index': 111,\n",
" 'word': 'Egypt',\n",
" 'start': 496,\n",
" 'end': 501},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.53756046,\n",
" 'index': 112,\n",
" 'word': '##ion',\n",
" 'start': 501,\n",
" 'end': 504},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.501027,\n",
" 'index': 113,\n",
" 'word': 'Ph',\n",
" 'start': 505,\n",
" 'end': 507},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6108546,\n",
" 'index': 114,\n",
" 'word': '##ara',\n",
" 'start': 507,\n",
" 'end': 510},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6155596,\n",
" 'index': 115,\n",
" 'word': '##oh',\n",
" 'start': 510,\n",
" 'end': 512},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.6744254,\n",
" 'index': 116,\n",
" 'word': '.',\n",
" 'start': 512,\n",
" 'end': 513},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6499993,\n",
" 'index': 117,\n",
" 'word': 'Very',\n",
" 'start': 514,\n",
" 'end': 518},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.64182097,\n",
" 'index': 118,\n",
" 'word': 'few',\n",
" 'start': 519,\n",
" 'end': 522},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.59322584,\n",
" 'index': 119,\n",
" 'word': 'days',\n",
" 'start': 523,\n",
" 'end': 527},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7502963,\n",
" 'index': 120,\n",
" 'word': 'later',\n",
" 'start': 528,\n",
" 'end': 533},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.74712145,\n",
" 'index': 121,\n",
" 'word': ',',\n",
" 'start': 533,\n",
" 'end': 534},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.66655517,\n",
" 'index': 122,\n",
" 'word': 'we',\n",
" 'start': 535,\n",
" 'end': 537},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.8063372,\n",
" 'index': 123,\n",
" 'word': 'revealed',\n",
" 'start': 538,\n",
" 'end': 546},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.57793516,\n",
" 'index': 124,\n",
" 'word': 'the',\n",
" 'start': 547,\n",
" 'end': 550},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.65657383,\n",
" 'index': 125,\n",
" 'word': 'image',\n",
" 'start': 551,\n",
" 'end': 556},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.54032975,\n",
" 'index': 126,\n",
" 'word': 'for',\n",
" 'start': 557,\n",
" 'end': 560},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6904917,\n",
" 'index': 127,\n",
" 'word': 'all',\n",
" 'start': 561,\n",
" 'end': 564},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5489662,\n",
" 'index': 128,\n",
" 'word': 'to',\n",
" 'start': 565,\n",
" 'end': 567},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5115776,\n",
" 'index': 129,\n",
" 'word': 'see',\n",
" 'start': 568,\n",
" 'end': 571},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.8207864,\n",
" 'index': 130,\n",
" 'word': ',',\n",
" 'start': 571,\n",
" 'end': 572},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5277151,\n",
" 'index': 131,\n",
" 'word': 'and',\n",
" 'start': 573,\n",
" 'end': 576},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6339588,\n",
" 'index': 132,\n",
" 'word': 'we',\n",
" 'start': 577,\n",
" 'end': 579},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5440828,\n",
" 'index': 133,\n",
" 'word': 'made',\n",
" 'start': 580,\n",
" 'end': 584},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.6284659,\n",
" 'index': 134,\n",
" 'word': 'sure',\n",
" 'start': 585,\n",
" 'end': 589},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5280459,\n",
" 'index': 135,\n",
" 'word': 'to',\n",
" 'start': 590,\n",
" 'end': 592},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.6735471,\n",
" 'index': 136,\n",
" 'word': 'note',\n",
" 'start': 593,\n",
" 'end': 597},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6085519,\n",
" 'index': 137,\n",
" 'word': 'that',\n",
" 'start': 598,\n",
" 'end': 602},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.69036937,\n",
" 'index': 138,\n",
" 'word': 'it',\n",
" 'start': 603,\n",
" 'end': 605},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7547675,\n",
" 'index': 139,\n",
" 'word': 'was',\n",
" 'start': 606,\n",
" 'end': 609},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7263249,\n",
" 'index': 140,\n",
" 'word': 'a',\n",
" 'start': 610,\n",
" 'end': 611},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7987393,\n",
" 'index': 141,\n",
" 'word': 'huge',\n",
" 'start': 612,\n",
" 'end': 616},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5952804,\n",
" 'index': 142,\n",
" 'word': 'rock',\n",
" 'start': 617,\n",
" 'end': 621},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.56155205,\n",
" 'index': 143,\n",
" 'word': 'formation',\n",
" 'start': 622,\n",
" 'end': 631},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6473168,\n",
" 'index': 144,\n",
" 'word': 'that',\n",
" 'start': 632,\n",
" 'end': 636},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5825665,\n",
" 'index': 145,\n",
" 'word': 'just',\n",
" 'start': 637,\n",
" 'end': 641},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.53029037,\n",
" 'index': 146,\n",
" 'word': 'resembled',\n",
" 'start': 642,\n",
" 'end': 651},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6174586,\n",
" 'index': 147,\n",
" 'word': 'a',\n",
" 'start': 652,\n",
" 'end': 653},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5836265,\n",
" 'index': 148,\n",
" 'word': 'human',\n",
" 'start': 654,\n",
" 'end': 659},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5860484,\n",
" 'index': 149,\n",
" 'word': 'head',\n",
" 'start': 660,\n",
" 'end': 664},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.51445687,\n",
" 'index': 150,\n",
" 'word': 'and',\n",
" 'start': 665,\n",
" 'end': 668},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5693341,\n",
" 'index': 151,\n",
" 'word': 'face',\n",
" 'start': 669,\n",
" 'end': 673},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.81818646,\n",
" 'index': 152,\n",
" 'word': ',',\n",
" 'start': 673,\n",
" 'end': 674},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6410124,\n",
" 'index': 153,\n",
" 'word': 'but',\n",
" 'start': 675,\n",
" 'end': 678},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6700791,\n",
" 'index': 154,\n",
" 'word': 'all',\n",
" 'start': 679,\n",
" 'end': 682},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5005887,\n",
" 'index': 155,\n",
" 'word': 'of',\n",
" 'start': 683,\n",
" 'end': 685},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71792203,\n",
" 'index': 156,\n",
" 'word': 'it',\n",
" 'start': 686,\n",
" 'end': 688},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7390527,\n",
" 'index': 157,\n",
" 'word': 'was',\n",
" 'start': 689,\n",
" 'end': 692},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7305893,\n",
" 'index': 158,\n",
" 'word': 'formed',\n",
" 'start': 693,\n",
" 'end': 699},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73789006,\n",
" 'index': 159,\n",
" 'word': 'by',\n",
" 'start': 700,\n",
" 'end': 702},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5128689,\n",
" 'index': 160,\n",
" 'word': 'shadows',\n",
" 'start': 703,\n",
" 'end': 710},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.67443025,\n",
" 'index': 161,\n",
" 'word': '.',\n",
" 'start': 710,\n",
" 'end': 711},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7642682,\n",
" 'index': 162,\n",
" 'word': 'We',\n",
" 'start': 712,\n",
" 'end': 714},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.658385,\n",
" 'index': 163,\n",
" 'word': 'only',\n",
" 'start': 715,\n",
" 'end': 719},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.70881367,\n",
" 'index': 164,\n",
" 'word': 'announced',\n",
" 'start': 720,\n",
" 'end': 729},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7176985,\n",
" 'index': 165,\n",
" 'word': 'it',\n",
" 'start': 730,\n",
" 'end': 732},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6474057,\n",
" 'index': 166,\n",
" 'word': 'because',\n",
" 'start': 733,\n",
" 'end': 740},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6813441,\n",
" 'index': 167,\n",
" 'word': 'we',\n",
" 'start': 741,\n",
" 'end': 743},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5333977,\n",
" 'index': 168,\n",
" 'word': 'thought',\n",
" 'start': 744,\n",
" 'end': 751},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7404348,\n",
" 'index': 169,\n",
" 'word': 'it',\n",
" 'start': 752,\n",
" 'end': 754},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.78600377,\n",
" 'index': 170,\n",
" 'word': 'would',\n",
" 'start': 755,\n",
" 'end': 760},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.58613163,\n",
" 'index': 171,\n",
" 'word': 'be',\n",
" 'start': 761,\n",
" 'end': 763},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.58903766,\n",
" 'index': 172,\n",
" 'word': 'a',\n",
" 'start': 764,\n",
" 'end': 765},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.69468915,\n",
" 'index': 173,\n",
" 'word': 'good',\n",
" 'start': 766,\n",
" 'end': 770},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6279253,\n",
" 'index': 174,\n",
" 'word': 'way',\n",
" 'start': 771,\n",
" 'end': 774},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.68305737,\n",
" 'index': 175,\n",
" 'word': 'to',\n",
" 'start': 775,\n",
" 'end': 777},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6546071,\n",
" 'index': 176,\n",
" 'word': 'engage',\n",
" 'start': 778,\n",
" 'end': 784},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.66487354,\n",
" 'index': 177,\n",
" 'word': 'the',\n",
" 'start': 785,\n",
" 'end': 788},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.771617,\n",
" 'index': 178,\n",
" 'word': 'public',\n",
" 'start': 789,\n",
" 'end': 795},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5646253,\n",
" 'index': 179,\n",
" 'word': 'with',\n",
" 'start': 796,\n",
" 'end': 800},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.55737066,\n",
" 'index': 180,\n",
" 'word': 'NASA',\n",
" 'start': 801,\n",
" 'end': 805},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5705753,\n",
" 'index': 181,\n",
" 'word': \"'\",\n",
" 'start': 805,\n",
" 'end': 806},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.59688944,\n",
" 'index': 182,\n",
" 'word': 's',\n",
" 'start': 806,\n",
" 'end': 807},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.563748,\n",
" 'index': 183,\n",
" 'word': 'findings',\n",
" 'start': 808,\n",
" 'end': 816},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.83522767,\n",
" 'index': 184,\n",
" 'word': ',',\n",
" 'start': 816,\n",
" 'end': 817},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.663783,\n",
" 'index': 185,\n",
" 'word': 'and',\n",
" 'start': 818,\n",
" 'end': 821},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.56715816,\n",
" 'index': 186,\n",
" 'word': 'at',\n",
" 'start': 822,\n",
" 'end': 824},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6969365,\n",
" 'index': 187,\n",
" 'word': '##rra',\n",
" 'start': 824,\n",
" 'end': 827},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6686826,\n",
" 'index': 188,\n",
" 'word': '##ct',\n",
" 'start': 827,\n",
" 'end': 829},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6169446,\n",
" 'index': 189,\n",
" 'word': 'attention',\n",
" 'start': 830,\n",
" 'end': 839},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72824407,\n",
" 'index': 190,\n",
" 'word': 'to',\n",
" 'start': 840,\n",
" 'end': 842},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5518576,\n",
" 'index': 191,\n",
" 'word': 'Mars',\n",
" 'start': 843,\n",
" 'end': 847},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6182919,\n",
" 'index': 192,\n",
" 'word': '-',\n",
" 'start': 847,\n",
" 'end': 848},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.60027426,\n",
" 'index': 193,\n",
" 'word': '-',\n",
" 'start': 848,\n",
" 'end': 849},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6326655,\n",
" 'index': 194,\n",
" 'word': 'and',\n",
" 'start': 850,\n",
" 'end': 853},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.62892854,\n",
" 'index': 195,\n",
" 'word': 'it',\n",
" 'start': 854,\n",
" 'end': 856},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.59196657,\n",
" 'index': 196,\n",
" 'word': 'did',\n",
" 'start': 857,\n",
" 'end': 860},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.67441356,\n",
" 'index': 197,\n",
" 'word': '.',\n",
" 'start': 860,\n",
" 'end': 861},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.58260876,\n",
" 'index': 198,\n",
" 'word': 'The',\n",
" 'start': 863,\n",
" 'end': 866},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.61155885,\n",
" 'index': 199,\n",
" 'word': 'face',\n",
" 'start': 867,\n",
" 'end': 871},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5333523,\n",
" 'index': 200,\n",
" 'word': 'on',\n",
" 'start': 872,\n",
" 'end': 874},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5905245,\n",
" 'index': 201,\n",
" 'word': 'Mars',\n",
" 'start': 875,\n",
" 'end': 879},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5726181,\n",
" 'index': 202,\n",
" 'word': 'soon',\n",
" 'start': 880,\n",
" 'end': 884},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.57342,\n",
" 'index': 203,\n",
" 'word': 'became',\n",
" 'start': 885,\n",
" 'end': 891},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6202053,\n",
" 'index': 204,\n",
" 'word': 'a',\n",
" 'start': 892,\n",
" 'end': 893},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5770226,\n",
" 'index': 205,\n",
" 'word': 'pop',\n",
" 'start': 894,\n",
" 'end': 897},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.67616236,\n",
" 'index': 206,\n",
" 'word': 'icon',\n",
" 'start': 898,\n",
" 'end': 902},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6696246,\n",
" 'index': 207,\n",
" 'word': ';',\n",
" 'start': 902,\n",
" 'end': 903},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6367805,\n",
" 'index': 208,\n",
" 'word': 'shot',\n",
" 'start': 904,\n",
" 'end': 908},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.54797834,\n",
" 'index': 209,\n",
" 'word': 'in',\n",
" 'start': 909,\n",
" 'end': 911},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6188789,\n",
" 'index': 210,\n",
" 'word': 'movies',\n",
" 'start': 912,\n",
" 'end': 918},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7146302,\n",
" 'index': 211,\n",
" 'word': ',',\n",
" 'start': 918,\n",
" 'end': 919},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5945637,\n",
" 'index': 212,\n",
" 'word': 'appeared',\n",
" 'start': 920,\n",
" 'end': 928},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6160087,\n",
" 'index': 213,\n",
" 'word': 'in',\n",
" 'start': 929,\n",
" 'end': 931},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.66332275,\n",
" 'index': 214,\n",
" 'word': 'books',\n",
" 'start': 932,\n",
" 'end': 937},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7474604,\n",
" 'index': 215,\n",
" 'word': ',',\n",
" 'start': 937,\n",
" 'end': 938},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6651565,\n",
" 'index': 216,\n",
" 'word': 'magazines',\n",
" 'start': 939,\n",
" 'end': 948},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7416081,\n",
" 'index': 217,\n",
" 'word': ',',\n",
" 'start': 948,\n",
" 'end': 949},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6236832,\n",
" 'index': 218,\n",
" 'word': 'radio',\n",
" 'start': 950,\n",
" 'end': 955},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5897754,\n",
" 'index': 219,\n",
" 'word': 'talk',\n",
" 'start': 956,\n",
" 'end': 960},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.58853656,\n",
" 'index': 220,\n",
" 'word': 'shows',\n",
" 'start': 961,\n",
" 'end': 966},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7553832,\n",
" 'index': 221,\n",
" 'word': ',',\n",
" 'start': 966,\n",
" 'end': 967},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.623144,\n",
" 'index': 222,\n",
" 'word': 'and',\n",
" 'start': 968,\n",
" 'end': 971},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.50205296,\n",
" 'index': 223,\n",
" 'word': 'haunted',\n",
" 'start': 972,\n",
" 'end': 979},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5905772,\n",
" 'index': 224,\n",
" 'word': 'grocery',\n",
" 'start': 980,\n",
" 'end': 987},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.64581704,\n",
" 'index': 225,\n",
" 'word': 'store',\n",
" 'start': 988,\n",
" 'end': 993},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.57227784,\n",
" 'index': 226,\n",
" 'word': 'check',\n",
" 'start': 994,\n",
" 'end': 999},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.62471133,\n",
" 'index': 227,\n",
" 'word': '##out',\n",
" 'start': 999,\n",
" 'end': 1002},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5380063,\n",
" 'index': 228,\n",
" 'word': 'lines',\n",
" 'start': 1003,\n",
" 'end': 1008},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7226769,\n",
" 'index': 229,\n",
" 'word': 'for',\n",
" 'start': 1009,\n",
" 'end': 1012},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6841871,\n",
" 'index': 230,\n",
" 'word': '25',\n",
" 'start': 1013,\n",
" 'end': 1015},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.59963995,\n",
" 'index': 231,\n",
" 'word': 'years',\n",
" 'start': 1016,\n",
" 'end': 1021},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.67440766,\n",
" 'index': 232,\n",
" 'word': '.',\n",
" 'start': 1021,\n",
" 'end': 1022},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.51083964,\n",
" 'index': 233,\n",
" 'word': 'Some',\n",
" 'start': 1023,\n",
" 'end': 1027},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.61685973,\n",
" 'index': 234,\n",
" 'word': 'people',\n",
" 'start': 1028,\n",
" 'end': 1034},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72570676,\n",
" 'index': 235,\n",
" 'word': 'thought',\n",
" 'start': 1035,\n",
" 'end': 1042},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.65166354,\n",
" 'index': 236,\n",
" 'word': 'the',\n",
" 'start': 1043,\n",
" 'end': 1046},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.526593,\n",
" 'index': 237,\n",
" 'word': 'natural',\n",
" 'start': 1047,\n",
" 'end': 1054},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5936935,\n",
" 'index': 238,\n",
" 'word': 'land',\n",
" 'start': 1055,\n",
" 'end': 1059},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.53759766,\n",
" 'index': 239,\n",
" 'word': '##form',\n",
" 'start': 1059,\n",
" 'end': 1063},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.68580973,\n",
" 'index': 240,\n",
" 'word': 'was',\n",
" 'start': 1064,\n",
" 'end': 1067},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.59179384,\n",
" 'index': 241,\n",
" 'word': 'evidence',\n",
" 'start': 1068,\n",
" 'end': 1076},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6524172,\n",
" 'index': 242,\n",
" 'word': 'of',\n",
" 'start': 1077,\n",
" 'end': 1079},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6464424,\n",
" 'index': 243,\n",
" 'word': 'life',\n",
" 'start': 1080,\n",
" 'end': 1084},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.69582665,\n",
" 'index': 244,\n",
" 'word': 'on',\n",
" 'start': 1085,\n",
" 'end': 1087},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5751102,\n",
" 'index': 245,\n",
" 'word': 'Mars',\n",
" 'start': 1088,\n",
" 'end': 1092},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.8460579,\n",
" 'index': 246,\n",
" 'word': ',',\n",
" 'start': 1092,\n",
" 'end': 1093},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6263853,\n",
" 'index': 247,\n",
" 'word': 'and',\n",
" 'start': 1094,\n",
" 'end': 1097},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6964252,\n",
" 'index': 248,\n",
" 'word': 'that',\n",
" 'start': 1098,\n",
" 'end': 1102},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5494476,\n",
" 'index': 249,\n",
" 'word': 'us',\n",
" 'start': 1103,\n",
" 'end': 1105},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.56226,\n",
" 'index': 250,\n",
" 'word': 'scientists',\n",
" 'start': 1106,\n",
" 'end': 1116},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7735633,\n",
" 'index': 251,\n",
" 'word': 'wanted',\n",
" 'start': 1117,\n",
" 'end': 1123},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.78176767,\n",
" 'index': 252,\n",
" 'word': 'to',\n",
" 'start': 1124,\n",
" 'end': 1126},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7204006,\n",
" 'index': 253,\n",
" 'word': 'hide',\n",
" 'start': 1127,\n",
" 'end': 1131},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.77582467,\n",
" 'index': 254,\n",
" 'word': 'it',\n",
" 'start': 1132,\n",
" 'end': 1134},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.81556284,\n",
" 'index': 255,\n",
" 'word': ',',\n",
" 'start': 1134,\n",
" 'end': 1135},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.73906016,\n",
" 'index': 256,\n",
" 'word': 'but',\n",
" 'start': 1136,\n",
" 'end': 1139},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.72601235,\n",
" 'index': 257,\n",
" 'word': 'really',\n",
" 'start': 1140,\n",
" 'end': 1146},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7567177,\n",
" 'index': 258,\n",
" 'word': ',',\n",
" 'start': 1146,\n",
" 'end': 1147},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6613144,\n",
" 'index': 259,\n",
" 'word': 'the',\n",
" 'start': 1148,\n",
" 'end': 1151},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5439451,\n",
" 'index': 260,\n",
" 'word': 'defenders',\n",
" 'start': 1152,\n",
" 'end': 1161},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.52611476,\n",
" 'index': 261,\n",
" 'word': 'of',\n",
" 'start': 1162,\n",
" 'end': 1164},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.61969113,\n",
" 'index': 262,\n",
" 'word': 'the',\n",
" 'start': 1165,\n",
" 'end': 1168},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5504615,\n",
" 'index': 263,\n",
" 'word': 'NASA',\n",
" 'start': 1169,\n",
" 'end': 1173},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6306784,\n",
" 'index': 264,\n",
" 'word': 'budget',\n",
" 'start': 1174,\n",
" 'end': 1180},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.61308634,\n",
" 'index': 265,\n",
" 'word': 'wish',\n",
" 'start': 1181,\n",
" 'end': 1185},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.68934727,\n",
" 'index': 266,\n",
" 'word': 'there',\n",
" 'start': 1186,\n",
" 'end': 1191},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7155088,\n",
" 'index': 267,\n",
" 'word': 'was',\n",
" 'start': 1192,\n",
" 'end': 1195},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.57382756,\n",
" 'index': 268,\n",
" 'word': 'ancient',\n",
" 'start': 1196,\n",
" 'end': 1203},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.56942445,\n",
" 'index': 269,\n",
" 'word': 'civilization',\n",
" 'start': 1204,\n",
" 'end': 1216},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6762141,\n",
" 'index': 270,\n",
" 'word': 'on',\n",
" 'start': 1217,\n",
" 'end': 1219},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5380082,\n",
" 'index': 271,\n",
" 'word': 'Mars',\n",
" 'start': 1220,\n",
" 'end': 1224},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.67440796,\n",
" 'index': 272,\n",
" 'word': '.',\n",
" 'start': 1224,\n",
" 'end': 1225},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6982536,\n",
" 'index': 273,\n",
" 'word': 'We',\n",
" 'start': 1226,\n",
" 'end': 1228},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.58452165,\n",
" 'index': 274,\n",
" 'word': 'decided',\n",
" 'start': 1229,\n",
" 'end': 1236},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6876171,\n",
" 'index': 275,\n",
" 'word': 'to',\n",
" 'start': 1237,\n",
" 'end': 1239},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5167381,\n",
" 'index': 276,\n",
" 'word': 'take',\n",
" 'start': 1240,\n",
" 'end': 1244},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5906027,\n",
" 'index': 277,\n",
" 'word': 'another',\n",
" 'start': 1245,\n",
" 'end': 1252},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.561436,\n",
" 'index': 278,\n",
" 'word': 'shot',\n",
" 'start': 1253,\n",
" 'end': 1257},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.57617104,\n",
" 'index': 279,\n",
" 'word': 'just',\n",
" 'start': 1258,\n",
" 'end': 1262},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.59515345,\n",
" 'index': 280,\n",
" 'word': 'to',\n",
" 'start': 1263,\n",
" 'end': 1265},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.585306,\n",
" 'index': 281,\n",
" 'word': 'make',\n",
" 'start': 1266,\n",
" 'end': 1270},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.55697155,\n",
" 'index': 282,\n",
" 'word': 'sure',\n",
" 'start': 1271,\n",
" 'end': 1275},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6123277,\n",
" 'index': 283,\n",
" 'word': 'we',\n",
" 'start': 1276,\n",
" 'end': 1278},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5167826,\n",
" 'index': 284,\n",
" 'word': 'weren',\n",
" 'start': 1279,\n",
" 'end': 1284},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.51131904,\n",
" 'index': 285,\n",
" 'word': \"'\",\n",
" 'start': 1284,\n",
" 'end': 1285},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5814942,\n",
" 'index': 286,\n",
" 'word': 't',\n",
" 'start': 1285,\n",
" 'end': 1286},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5043164,\n",
" 'index': 287,\n",
" 'word': 'wrong',\n",
" 'start': 1287,\n",
" 'end': 1292},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7838791,\n",
" 'index': 288,\n",
" 'word': ',',\n",
" 'start': 1292,\n",
" 'end': 1293},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6923021,\n",
" 'index': 289,\n",
" 'word': 'on',\n",
" 'start': 1294,\n",
" 'end': 1296},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5097389,\n",
" 'index': 290,\n",
" 'word': 'April',\n",
" 'start': 1297,\n",
" 'end': 1302},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5005761,\n",
" 'index': 291,\n",
" 'word': '5',\n",
" 'start': 1303,\n",
" 'end': 1304},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.64288056,\n",
" 'index': 292,\n",
" 'word': ',',\n",
" 'start': 1304,\n",
" 'end': 1305},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.53319114,\n",
" 'index': 293,\n",
" 'word': '1998',\n",
" 'start': 1306,\n",
" 'end': 1310},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.57352495,\n",
" 'index': 294,\n",
" 'word': '.',\n",
" 'start': 1310,\n",
" 'end': 1311},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6711837,\n",
" 'index': 295,\n",
" 'word': 'Michael',\n",
" 'start': 1312,\n",
" 'end': 1319},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5190688,\n",
" 'index': 296,\n",
" 'word': 'Mali',\n",
" 'start': 1320,\n",
" 'end': 1324},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.547607,\n",
" 'index': 297,\n",
" 'word': '##n',\n",
" 'start': 1324,\n",
" 'end': 1325},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5587775,\n",
" 'index': 298,\n",
" 'word': 'and',\n",
" 'start': 1326,\n",
" 'end': 1329},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.53947425,\n",
" 'index': 299,\n",
" 'word': 'his',\n",
" 'start': 1330,\n",
" 'end': 1333},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.57580537,\n",
" 'index': 300,\n",
" 'word': 'Mars',\n",
" 'start': 1334,\n",
" 'end': 1338},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.65286255,\n",
" 'index': 301,\n",
" 'word': 'Or',\n",
" 'start': 1339,\n",
" 'end': 1341},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.6625232,\n",
" 'index': 302,\n",
" 'word': '##bit',\n",
" 'start': 1341,\n",
" 'end': 1344},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.58811414,\n",
" 'index': 303,\n",
" 'word': '##er',\n",
" 'start': 1344,\n",
" 'end': 1346},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.56405336,\n",
" 'index': 304,\n",
" 'word': 'camera',\n",
" 'start': 1347,\n",
" 'end': 1353},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.52700096,\n",
" 'index': 305,\n",
" 'word': 'team',\n",
" 'start': 1354,\n",
" 'end': 1358},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.53318214,\n",
" 'index': 306,\n",
" 'word': 'took',\n",
" 'start': 1359,\n",
" 'end': 1363},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.535714,\n",
" 'index': 307,\n",
" 'word': 'a',\n",
" 'start': 1364,\n",
" 'end': 1365},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.55766904,\n",
" 'index': 308,\n",
" 'word': 'picture',\n",
" 'start': 1366,\n",
" 'end': 1373},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.67556494,\n",
" 'index': 309,\n",
" 'word': 'that',\n",
" 'start': 1374,\n",
" 'end': 1378},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5773598,\n",
" 'index': 310,\n",
" 'word': 'was',\n",
" 'start': 1379,\n",
" 'end': 1382},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7880312,\n",
" 'index': 311,\n",
" 'word': 'ten',\n",
" 'start': 1383,\n",
" 'end': 1386},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6340814,\n",
" 'index': 312,\n",
" 'word': 'times',\n",
" 'start': 1387,\n",
" 'end': 1392},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.57962173,\n",
" 'index': 313,\n",
" 'word': 'sharp',\n",
" 'start': 1393,\n",
" 'end': 1398},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5738646,\n",
" 'index': 314,\n",
" 'word': '##er',\n",
" 'start': 1398,\n",
" 'end': 1400},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5188506,\n",
" 'index': 315,\n",
" 'word': 'than',\n",
" 'start': 1401,\n",
" 'end': 1405},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5666434,\n",
" 'index': 316,\n",
" 'word': 'the',\n",
" 'start': 1406,\n",
" 'end': 1409},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.56921905,\n",
" 'index': 317,\n",
" 'word': 'original',\n",
" 'start': 1410,\n",
" 'end': 1418},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6608325,\n",
" 'index': 318,\n",
" 'word': 'Viking',\n",
" 'start': 1419,\n",
" 'end': 1425},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5899687,\n",
" 'index': 319,\n",
" 'word': 'photos',\n",
" 'start': 1426,\n",
" 'end': 1432},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.83449113,\n",
" 'index': 320,\n",
" 'word': ',',\n",
" 'start': 1432,\n",
" 'end': 1433},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.80289483,\n",
" 'index': 321,\n",
" 'word': 'revealing',\n",
" 'start': 1434,\n",
" 'end': 1443},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.67294866,\n",
" 'index': 322,\n",
" 'word': 'a',\n",
" 'start': 1444,\n",
" 'end': 1445},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5613308,\n",
" 'index': 323,\n",
" 'word': 'natural',\n",
" 'start': 1446,\n",
" 'end': 1453},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.505271,\n",
" 'index': 324,\n",
" 'word': 'land',\n",
" 'start': 1454,\n",
" 'end': 1458},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6070904,\n",
" 'index': 325,\n",
" 'word': '##form',\n",
" 'start': 1458,\n",
" 'end': 1462},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7992068,\n",
" 'index': 326,\n",
" 'word': ',',\n",
" 'start': 1462,\n",
" 'end': 1463},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6349267,\n",
" 'index': 327,\n",
" 'word': 'which',\n",
" 'start': 1464,\n",
" 'end': 1469},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6860331,\n",
" 'index': 328,\n",
" 'word': 'meant',\n",
" 'start': 1470,\n",
" 'end': 1475},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.540704,\n",
" 'index': 329,\n",
" 'word': 'no',\n",
" 'start': 1476,\n",
" 'end': 1478},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.62928075,\n",
" 'index': 330,\n",
" 'word': 'alien',\n",
" 'start': 1479,\n",
" 'end': 1484},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5880116,\n",
" 'index': 331,\n",
" 'word': 'monument',\n",
" 'start': 1485,\n",
" 'end': 1493},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.6744335,\n",
" 'index': 332,\n",
" 'word': '.',\n",
" 'start': 1493,\n",
" 'end': 1494},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.56081796,\n",
" 'index': 333,\n",
" 'word': '\"',\n",
" 'start': 1495,\n",
" 'end': 1496},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.51550716,\n",
" 'index': 334,\n",
" 'word': 'But',\n",
" 'start': 1496,\n",
" 'end': 1499},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.69971544,\n",
" 'index': 335,\n",
" 'word': 'that',\n",
" 'start': 1500,\n",
" 'end': 1504},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.59912956,\n",
" 'index': 336,\n",
" 'word': 'picture',\n",
" 'start': 1505,\n",
" 'end': 1512},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6235601,\n",
" 'index': 337,\n",
" 'word': 'wasn',\n",
" 'start': 1513,\n",
" 'end': 1517},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5095923,\n",
" 'index': 338,\n",
" 'word': \"'\",\n",
" 'start': 1517,\n",
" 'end': 1518},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6090436,\n",
" 'index': 339,\n",
" 'word': 't',\n",
" 'start': 1518,\n",
" 'end': 1519},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.78969496,\n",
" 'index': 340,\n",
" 'word': 'very',\n",
" 'start': 1520,\n",
" 'end': 1524},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7117616,\n",
" 'index': 341,\n",
" 'word': 'clear',\n",
" 'start': 1525,\n",
" 'end': 1530},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6051015,\n",
" 'index': 342,\n",
" 'word': 'at',\n",
" 'start': 1531,\n",
" 'end': 1533},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5591377,\n",
" 'index': 343,\n",
" 'word': 'all',\n",
" 'start': 1534,\n",
" 'end': 1537},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.8091911,\n",
" 'index': 344,\n",
" 'word': ',',\n",
" 'start': 1537,\n",
" 'end': 1538},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6422674,\n",
" 'index': 345,\n",
" 'word': 'which',\n",
" 'start': 1539,\n",
" 'end': 1544},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.60172576,\n",
" 'index': 346,\n",
" 'word': 'could',\n",
" 'start': 1545,\n",
" 'end': 1550},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.60047704,\n",
" 'index': 347,\n",
" 'word': 'mean',\n",
" 'start': 1551,\n",
" 'end': 1555},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.56904197,\n",
" 'index': 348,\n",
" 'word': 'alien',\n",
" 'start': 1556,\n",
" 'end': 1561},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.62595856,\n",
" 'index': 349,\n",
" 'word': 'markings',\n",
" 'start': 1562,\n",
" 'end': 1570},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6299875,\n",
" 'index': 350,\n",
" 'word': 'were',\n",
" 'start': 1571,\n",
" 'end': 1575},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6949092,\n",
" 'index': 351,\n",
" 'word': 'hidden',\n",
" 'start': 1576,\n",
" 'end': 1582},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6191803,\n",
" 'index': 352,\n",
" 'word': 'by',\n",
" 'start': 1583,\n",
" 'end': 1585},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5347818,\n",
" 'index': 353,\n",
" 'word': 'haze',\n",
" 'start': 1586,\n",
" 'end': 1590},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5951616,\n",
" 'index': 354,\n",
" 'word': '\"',\n",
" 'start': 1590,\n",
" 'end': 1591},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5990277,\n",
" 'index': 355,\n",
" 'word': 'Well',\n",
" 'start': 1592,\n",
" 'end': 1596},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.68367994,\n",
" 'index': 356,\n",
" 'word': 'no',\n",
" 'start': 1597,\n",
" 'end': 1599},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.780734,\n",
" 'index': 357,\n",
" 'word': ',',\n",
" 'start': 1599,\n",
" 'end': 1600},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.51023954,\n",
" 'index': 358,\n",
" 'word': 'yes',\n",
" 'start': 1601,\n",
" 'end': 1604},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6934846,\n",
" 'index': 359,\n",
" 'word': 'that',\n",
" 'start': 1605,\n",
" 'end': 1609},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.62337947,\n",
" 'index': 360,\n",
" 'word': 'rumor',\n",
" 'start': 1610,\n",
" 'end': 1615},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.69536877,\n",
" 'index': 361,\n",
" 'word': 'started',\n",
" 'start': 1616,\n",
" 'end': 1623},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.8160849,\n",
" 'index': 362,\n",
" 'word': ',',\n",
" 'start': 1623,\n",
" 'end': 1624},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5922601,\n",
" 'index': 363,\n",
" 'word': 'but',\n",
" 'start': 1625,\n",
" 'end': 1628},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.64855474,\n",
" 'index': 364,\n",
" 'word': 'to',\n",
" 'start': 1629,\n",
" 'end': 1631},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5228099,\n",
" 'index': 365,\n",
" 'word': 'prove',\n",
" 'start': 1632,\n",
" 'end': 1637},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6812592,\n",
" 'index': 366,\n",
" 'word': 'them',\n",
" 'start': 1638,\n",
" 'end': 1642},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.64243567,\n",
" 'index': 367,\n",
" 'word': 'wrong',\n",
" 'start': 1643,\n",
" 'end': 1648},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6727351,\n",
" 'index': 368,\n",
" 'word': 'on',\n",
" 'start': 1649,\n",
" 'end': 1651},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.6589173,\n",
" 'index': 369,\n",
" 'word': 'April',\n",
" 'start': 1652,\n",
" 'end': 1657},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.6754752,\n",
" 'index': 370,\n",
" 'word': '8',\n",
" 'start': 1658,\n",
" 'end': 1659},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.60759,\n",
" 'index': 371,\n",
" 'word': ',',\n",
" 'start': 1659,\n",
" 'end': 1660},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.6044962,\n",
" 'index': 372,\n",
" 'word': '2001',\n",
" 'start': 1661,\n",
" 'end': 1665},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6850097,\n",
" 'index': 373,\n",
" 'word': 'we',\n",
" 'start': 1666,\n",
" 'end': 1668},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5219355,\n",
" 'index': 374,\n",
" 'word': 'decided',\n",
" 'start': 1669,\n",
" 'end': 1676},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6707725,\n",
" 'index': 375,\n",
" 'word': 'to',\n",
" 'start': 1677,\n",
" 'end': 1679},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.51351875,\n",
" 'index': 376,\n",
" 'word': 'take',\n",
" 'start': 1680,\n",
" 'end': 1684},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5230102,\n",
" 'index': 377,\n",
" 'word': 'another',\n",
" 'start': 1685,\n",
" 'end': 1692},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5453028,\n",
" 'index': 378,\n",
" 'word': 'picture',\n",
" 'start': 1693,\n",
" 'end': 1700},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7876373,\n",
" 'index': 379,\n",
" 'word': ',',\n",
" 'start': 1700,\n",
" 'end': 1701},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5465298,\n",
" 'index': 380,\n",
" 'word': 'making',\n",
" 'start': 1702,\n",
" 'end': 1708},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.63919556,\n",
" 'index': 381,\n",
" 'word': 'sure',\n",
" 'start': 1709,\n",
" 'end': 1713},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5663544,\n",
" 'index': 382,\n",
" 'word': 'it',\n",
" 'start': 1714,\n",
" 'end': 1716},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.604365,\n",
" 'index': 383,\n",
" 'word': 'was',\n",
" 'start': 1717,\n",
" 'end': 1720},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.66089803,\n",
" 'index': 384,\n",
" 'word': 'a',\n",
" 'start': 1721,\n",
" 'end': 1722},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6593189,\n",
" 'index': 385,\n",
" 'word': 'cloud',\n",
" 'start': 1723,\n",
" 'end': 1728},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.58361953,\n",
" 'index': 386,\n",
" 'word': '##less',\n",
" 'start': 1728,\n",
" 'end': 1732},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7110155,\n",
" 'index': 387,\n",
" 'word': 'summer',\n",
" 'start': 1733,\n",
" 'end': 1739},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5355839,\n",
" 'index': 388,\n",
" 'word': 'day',\n",
" 'start': 1740,\n",
" 'end': 1743},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.6743491,\n",
" 'index': 389,\n",
" 'word': '.',\n",
" 'start': 1743,\n",
" 'end': 1744},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5186097,\n",
" 'index': 390,\n",
" 'word': 'Mali',\n",
" 'start': 1745,\n",
" 'end': 1749},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.6076603,\n",
" 'index': 391,\n",
" 'word': '##n',\n",
" 'start': 1749,\n",
" 'end': 1750},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5730305,\n",
" 'index': 392,\n",
" 'word': \"'\",\n",
" 'start': 1750,\n",
" 'end': 1751},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.55874217,\n",
" 'index': 393,\n",
" 'word': 's',\n",
" 'start': 1751,\n",
" 'end': 1752},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5618185,\n",
" 'index': 394,\n",
" 'word': 'team',\n",
" 'start': 1753,\n",
" 'end': 1757},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5198947,\n",
" 'index': 395,\n",
" 'word': 'captured',\n",
" 'start': 1758,\n",
" 'end': 1766},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.53001165,\n",
" 'index': 396,\n",
" 'word': 'an',\n",
" 'start': 1767,\n",
" 'end': 1769},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.6115317,\n",
" 'index': 397,\n",
" 'word': 'amazing',\n",
" 'start': 1770,\n",
" 'end': 1777},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.61094236,\n",
" 'index': 398,\n",
" 'word': 'photo',\n",
" 'start': 1778,\n",
" 'end': 1783},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6055951,\n",
" 'index': 399,\n",
" 'word': 'using',\n",
" 'start': 1784,\n",
" 'end': 1789},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.6140764,\n",
" 'index': 400,\n",
" 'word': 'the',\n",
" 'start': 1790,\n",
" 'end': 1793},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6442308,\n",
" 'index': 401,\n",
" 'word': 'camera',\n",
" 'start': 1794,\n",
" 'end': 1800},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.63841516,\n",
" 'index': 402,\n",
" 'word': \"'\",\n",
" 'start': 1800,\n",
" 'end': 1801},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5404359,\n",
" 'index': 403,\n",
" 'word': 's',\n",
" 'start': 1801,\n",
" 'end': 1802},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5500208,\n",
" 'index': 404,\n",
" 'word': 'absolute',\n",
" 'start': 1803,\n",
" 'end': 1811},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.52286357,\n",
" 'index': 405,\n",
" 'word': 'maximum',\n",
" 'start': 1812,\n",
" 'end': 1819},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.51756054,\n",
" 'index': 406,\n",
" 'word': 'revolution',\n",
" 'start': 1820,\n",
" 'end': 1830},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.6743314,\n",
" 'index': 407,\n",
" 'word': '.',\n",
" 'start': 1830,\n",
" 'end': 1831},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5196729,\n",
" 'index': 408,\n",
" 'word': 'With',\n",
" 'start': 1832,\n",
" 'end': 1836},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.542713,\n",
" 'index': 409,\n",
" 'word': 'this',\n",
" 'start': 1837,\n",
" 'end': 1841},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.61549795,\n",
" 'index': 410,\n",
" 'word': 'camera',\n",
" 'start': 1842,\n",
" 'end': 1848},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6854319,\n",
" 'index': 411,\n",
" 'word': 'you',\n",
" 'start': 1849,\n",
" 'end': 1852},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.54689115,\n",
" 'index': 412,\n",
" 'word': 'can',\n",
" 'start': 1853,\n",
" 'end': 1856},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.51496017,\n",
" 'index': 413,\n",
" 'word': 'disc',\n",
" 'start': 1857,\n",
" 'end': 1861},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.51617885,\n",
" 'index': 414,\n",
" 'word': '##ern',\n",
" 'start': 1861,\n",
" 'end': 1864},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5568011,\n",
" 'index': 415,\n",
" 'word': 'things',\n",
" 'start': 1865,\n",
" 'end': 1871},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.62646383,\n",
" 'index': 416,\n",
" 'word': 'in',\n",
" 'start': 1872,\n",
" 'end': 1874},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5608486,\n",
" 'index': 417,\n",
" 'word': 'a',\n",
" 'start': 1875,\n",
" 'end': 1876},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.50235623,\n",
" 'index': 418,\n",
" 'word': 'digital',\n",
" 'start': 1877,\n",
" 'end': 1884},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5582305,\n",
" 'index': 419,\n",
" 'word': 'image',\n",
" 'start': 1885,\n",
" 'end': 1890},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7322742,\n",
" 'index': 420,\n",
" 'word': ',',\n",
" 'start': 1890,\n",
" 'end': 1891},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6216431,\n",
" 'index': 421,\n",
" 'word': '3',\n",
" 'start': 1892,\n",
" 'end': 1893},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6138383,\n",
" 'index': 422,\n",
" 'word': 'times',\n",
" 'start': 1894,\n",
" 'end': 1899},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.61178154,\n",
" 'index': 423,\n",
" 'word': 'bigger',\n",
" 'start': 1900,\n",
" 'end': 1906},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.51271856,\n",
" 'index': 424,\n",
" 'word': 'than',\n",
" 'start': 1907,\n",
" 'end': 1911},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5323224,\n",
" 'index': 425,\n",
" 'word': 'the',\n",
" 'start': 1912,\n",
" 'end': 1915},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5431272,\n",
" 'index': 426,\n",
" 'word': 'p',\n",
" 'start': 1916,\n",
" 'end': 1917},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.50044477,\n",
" 'index': 427,\n",
" 'word': '##ixel',\n",
" 'start': 1917,\n",
" 'end': 1921},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.55557716,\n",
" 'index': 428,\n",
" 'word': 'size',\n",
" 'start': 1922,\n",
" 'end': 1926},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.616753,\n",
" 'index': 429,\n",
" 'word': 'which',\n",
" 'start': 1927,\n",
" 'end': 1932},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.71363014,\n",
" 'index': 430,\n",
" 'word': 'means',\n",
" 'start': 1933,\n",
" 'end': 1938},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5364858,\n",
" 'index': 431,\n",
" 'word': 'if',\n",
" 'start': 1939,\n",
" 'end': 1941},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5746519,\n",
" 'index': 432,\n",
" 'word': 'there',\n",
" 'start': 1942,\n",
" 'end': 1947},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5798605,\n",
" 'index': 433,\n",
" 'word': 'were',\n",
" 'start': 1948,\n",
" 'end': 1952},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.7288978,\n",
" 'index': 434,\n",
" 'word': 'any',\n",
" 'start': 1953,\n",
" 'end': 1956},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.55390185,\n",
" 'index': 435,\n",
" 'word': 'signs',\n",
" 'start': 1957,\n",
" 'end': 1962},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6790573,\n",
" 'index': 436,\n",
" 'word': 'of',\n",
" 'start': 1963,\n",
" 'end': 1965},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6747198,\n",
" 'index': 437,\n",
" 'word': 'life',\n",
" 'start': 1966,\n",
" 'end': 1970},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.76907825,\n",
" 'index': 438,\n",
" 'word': ',',\n",
" 'start': 1970,\n",
" 'end': 1971},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.68350637,\n",
" 'index': 439,\n",
" 'word': 'you',\n",
" 'start': 1972,\n",
" 'end': 1975},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.50333244,\n",
" 'index': 440,\n",
" 'word': 'could',\n",
" 'start': 1976,\n",
" 'end': 1981},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.51758593,\n",
" 'index': 441,\n",
" 'word': 'easily',\n",
" 'start': 1982,\n",
" 'end': 1988},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6057637,\n",
" 'index': 442,\n",
" 'word': 'see',\n",
" 'start': 1989,\n",
" 'end': 1992},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.52759,\n",
" 'index': 443,\n",
" 'word': 'what',\n",
" 'start': 1993,\n",
" 'end': 1997},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.53059214,\n",
" 'index': 444,\n",
" 'word': 'they',\n",
" 'start': 1998,\n",
" 'end': 2002},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6364522,\n",
" 'index': 445,\n",
" 'word': 'were',\n",
" 'start': 2003,\n",
" 'end': 2007},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.67442816,\n",
" 'index': 446,\n",
" 'word': '.',\n",
" 'start': 2007,\n",
" 'end': 2008},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5476038,\n",
" 'index': 447,\n",
" 'word': 'What',\n",
" 'start': 2009,\n",
" 'end': 2013},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5251065,\n",
" 'index': 448,\n",
" 'word': 'the',\n",
" 'start': 2014,\n",
" 'end': 2017},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.52539897,\n",
" 'index': 449,\n",
" 'word': 'picture',\n",
" 'start': 2018,\n",
" 'end': 2025},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7622849,\n",
" 'index': 450,\n",
" 'word': 'showed',\n",
" 'start': 2026,\n",
" 'end': 2032},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7453803,\n",
" 'index': 451,\n",
" 'word': 'was',\n",
" 'start': 2033,\n",
" 'end': 2036},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6480745,\n",
" 'index': 452,\n",
" 'word': 'the',\n",
" 'start': 2037,\n",
" 'end': 2040},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5418471,\n",
" 'index': 453,\n",
" 'word': 'butt',\n",
" 'start': 2041,\n",
" 'end': 2045},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5352597,\n",
" 'index': 454,\n",
" 'word': '##e',\n",
" 'start': 2045,\n",
" 'end': 2046},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.64424896,\n",
" 'index': 455,\n",
" 'word': 'or',\n",
" 'start': 2047,\n",
" 'end': 2049},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7269686,\n",
" 'index': 456,\n",
" 'word': 'me',\n",
" 'start': 2050,\n",
" 'end': 2052},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.68219054,\n",
" 'index': 457,\n",
" 'word': '##sa',\n",
" 'start': 2052,\n",
" 'end': 2054},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.7789693,\n",
" 'index': 458,\n",
" 'word': ',',\n",
" 'start': 2054,\n",
" 'end': 2055},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.571613,\n",
" 'index': 459,\n",
" 'word': 'which',\n",
" 'start': 2056,\n",
" 'end': 2061},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5934434,\n",
" 'index': 460,\n",
" 'word': 'are',\n",
" 'start': 2062,\n",
" 'end': 2065},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.58099824,\n",
" 'index': 461,\n",
" 'word': 'land',\n",
" 'start': 2066,\n",
" 'end': 2070},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5988671,\n",
" 'index': 462,\n",
" 'word': '##form',\n",
" 'start': 2070,\n",
" 'end': 2074},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.58407044,\n",
" 'index': 463,\n",
" 'word': '##s',\n",
" 'start': 2074,\n",
" 'end': 2075},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.68731594,\n",
" 'index': 464,\n",
" 'word': 'common',\n",
" 'start': 2076,\n",
" 'end': 2082},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.6056752,\n",
" 'index': 465,\n",
" 'word': 'around',\n",
" 'start': 2083,\n",
" 'end': 2089},\n",
" {'entity': 'LABEL_1',\n",
" 'score': 0.5920002,\n",
" 'index': 466,\n",
" 'word': 'the',\n",
" 'start': 2090,\n",
" 'end': 2093},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.53270173,\n",
" 'index': 467,\n",
" 'word': 'American',\n",
" 'start': 2094,\n",
" 'end': 2102},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.5303487,\n",
" 'index': 468,\n",
" 'word': 'West',\n",
" 'start': 2103,\n",
" 'end': 2107},\n",
" {'entity': 'LABEL_0',\n",
" 'score': 0.67443454,\n",
" 'index': 469,\n",
" 'word': '.',\n",
" 'start': 2107,\n",
" 'end': 2108}]"
]
},
"execution_count": 113,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"\n",
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"google-bert/bert-large-cased-whole-word-masking\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"google-bert/bert-large-cased-whole-word-masking\")\n",
"nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"\n",
"ner_results = nlp(text)\n",
"ner_results\n",
"\n"
]
},
{
"cell_type": "markdown",
"id": "33fb7228-f9d1-49a1-83a2-3051a9a490cf",
"metadata": {},
"source": [
"## 54 PlanTL-GOB-ES/es_anonimization_core_lg"
]
},
{
"cell_type": "code",
"execution_count": 114,
"id": "4bff7320-9f0f-4bd8-bf10-902f1c0c9100",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\NW\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
" warnings.warn(\n"
]
},
{
"ename": "OSError",
"evalue": "PlanTL-GOB-ES/es_anonimization_core_lg does not appear to have a file named config.json. Checkout 'https://huggingface.co/PlanTL-GOB-ES/es_anonimization_core_lg/tree/main' for available files.",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mHTTPError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\utils\\_errors.py:304\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[1;34m(response, endpoint_name)\u001b[0m\n\u001b[0;32m 303\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 304\u001b[0m response\u001b[38;5;241m.\u001b[39mraise_for_status()\n\u001b[0;32m 305\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError \u001b[38;5;28;01mas\u001b[39;00m e:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\requests\\models.py:1024\u001b[0m, in \u001b[0;36mResponse.raise_for_status\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1023\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m http_error_msg:\n\u001b[1;32m-> 1024\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(http_error_msg, response\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m)\n",
"\u001b[1;31mHTTPError\u001b[0m: 404 Client Error: Not Found for url: https://huggingface.co/PlanTL-GOB-ES/es_anonimization_core_lg/resolve/main/config.json",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[1;31mEntryNotFoundError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\utils\\hub.py:399\u001b[0m, in \u001b[0;36mcached_file\u001b[1;34m(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)\u001b[0m\n\u001b[0;32m 397\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 398\u001b[0m \u001b[38;5;66;03m# Load from URL or cache if already cached\u001b[39;00m\n\u001b[1;32m--> 399\u001b[0m resolved_file \u001b[38;5;241m=\u001b[39m hf_hub_download(\n\u001b[0;32m 400\u001b[0m path_or_repo_id,\n\u001b[0;32m 401\u001b[0m filename,\n\u001b[0;32m 402\u001b[0m subfolder\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(subfolder) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m subfolder,\n\u001b[0;32m 403\u001b[0m repo_type\u001b[38;5;241m=\u001b[39mrepo_type,\n\u001b[0;32m 404\u001b[0m revision\u001b[38;5;241m=\u001b[39mrevision,\n\u001b[0;32m 405\u001b[0m cache_dir\u001b[38;5;241m=\u001b[39mcache_dir,\n\u001b[0;32m 406\u001b[0m user_agent\u001b[38;5;241m=\u001b[39muser_agent,\n\u001b[0;32m 407\u001b[0m force_download\u001b[38;5;241m=\u001b[39mforce_download,\n\u001b[0;32m 408\u001b[0m proxies\u001b[38;5;241m=\u001b[39mproxies,\n\u001b[0;32m 409\u001b[0m resume_download\u001b[38;5;241m=\u001b[39mresume_download,\n\u001b[0;32m 410\u001b[0m token\u001b[38;5;241m=\u001b[39mtoken,\n\u001b[0;32m 411\u001b[0m local_files_only\u001b[38;5;241m=\u001b[39mlocal_files_only,\n\u001b[0;32m 412\u001b[0m )\n\u001b[0;32m 413\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m GatedRepoError \u001b[38;5;28;01mas\u001b[39;00m e:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\utils\\_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.._inner_fn\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 112\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[1;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1221\u001b[0m, in \u001b[0;36mhf_hub_download\u001b[1;34m(repo_id, filename, subfolder, repo_type, revision, library_name, library_version, cache_dir, local_dir, user_agent, force_download, proxies, etag_timeout, token, local_files_only, headers, endpoint, legacy_cache_layout, resume_download, force_filename, local_dir_use_symlinks)\u001b[0m\n\u001b[0;32m 1220\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1221\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _hf_hub_download_to_cache_dir(\n\u001b[0;32m 1222\u001b[0m \u001b[38;5;66;03m# Destination\u001b[39;00m\n\u001b[0;32m 1223\u001b[0m cache_dir\u001b[38;5;241m=\u001b[39mcache_dir,\n\u001b[0;32m 1224\u001b[0m \u001b[38;5;66;03m# File info\u001b[39;00m\n\u001b[0;32m 1225\u001b[0m repo_id\u001b[38;5;241m=\u001b[39mrepo_id,\n\u001b[0;32m 1226\u001b[0m filename\u001b[38;5;241m=\u001b[39mfilename,\n\u001b[0;32m 1227\u001b[0m repo_type\u001b[38;5;241m=\u001b[39mrepo_type,\n\u001b[0;32m 1228\u001b[0m revision\u001b[38;5;241m=\u001b[39mrevision,\n\u001b[0;32m 1229\u001b[0m \u001b[38;5;66;03m# HTTP info\u001b[39;00m\n\u001b[0;32m 1230\u001b[0m headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[0;32m 1231\u001b[0m proxies\u001b[38;5;241m=\u001b[39mproxies,\n\u001b[0;32m 1232\u001b[0m etag_timeout\u001b[38;5;241m=\u001b[39metag_timeout,\n\u001b[0;32m 1233\u001b[0m endpoint\u001b[38;5;241m=\u001b[39mendpoint,\n\u001b[0;32m 1234\u001b[0m \u001b[38;5;66;03m# Additional options\u001b[39;00m\n\u001b[0;32m 1235\u001b[0m local_files_only\u001b[38;5;241m=\u001b[39mlocal_files_only,\n\u001b[0;32m 1236\u001b[0m force_download\u001b[38;5;241m=\u001b[39mforce_download,\n\u001b[0;32m 1237\u001b[0m )\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1282\u001b[0m, in \u001b[0;36m_hf_hub_download_to_cache_dir\u001b[1;34m(cache_dir, repo_id, filename, repo_type, revision, headers, proxies, etag_timeout, endpoint, local_files_only, force_download)\u001b[0m\n\u001b[0;32m 1280\u001b[0m \u001b[38;5;66;03m# Try to get metadata (etag, commit_hash, url, size) from the server.\u001b[39;00m\n\u001b[0;32m 1281\u001b[0m \u001b[38;5;66;03m# If we can't, a HEAD request error is returned.\u001b[39;00m\n\u001b[1;32m-> 1282\u001b[0m (url_to_download, etag, commit_hash, expected_size, head_call_error) \u001b[38;5;241m=\u001b[39m _get_metadata_or_catch_error(\n\u001b[0;32m 1283\u001b[0m repo_id\u001b[38;5;241m=\u001b[39mrepo_id,\n\u001b[0;32m 1284\u001b[0m filename\u001b[38;5;241m=\u001b[39mfilename,\n\u001b[0;32m 1285\u001b[0m repo_type\u001b[38;5;241m=\u001b[39mrepo_type,\n\u001b[0;32m 1286\u001b[0m revision\u001b[38;5;241m=\u001b[39mrevision,\n\u001b[0;32m 1287\u001b[0m endpoint\u001b[38;5;241m=\u001b[39mendpoint,\n\u001b[0;32m 1288\u001b[0m proxies\u001b[38;5;241m=\u001b[39mproxies,\n\u001b[0;32m 1289\u001b[0m etag_timeout\u001b[38;5;241m=\u001b[39metag_timeout,\n\u001b[0;32m 1290\u001b[0m headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[0;32m 1291\u001b[0m local_files_only\u001b[38;5;241m=\u001b[39mlocal_files_only,\n\u001b[0;32m 1292\u001b[0m storage_folder\u001b[38;5;241m=\u001b[39mstorage_folder,\n\u001b[0;32m 1293\u001b[0m relative_filename\u001b[38;5;241m=\u001b[39mrelative_filename,\n\u001b[0;32m 1294\u001b[0m )\n\u001b[0;32m 1296\u001b[0m \u001b[38;5;66;03m# etag can be None for several reasons:\u001b[39;00m\n\u001b[0;32m 1297\u001b[0m \u001b[38;5;66;03m# 1. we passed local_files_only.\u001b[39;00m\n\u001b[0;32m 1298\u001b[0m \u001b[38;5;66;03m# 2. we don't have a connection\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 1304\u001b[0m \u001b[38;5;66;03m# If the specified revision is a commit hash, look inside \"snapshots\".\u001b[39;00m\n\u001b[0;32m 1305\u001b[0m \u001b[38;5;66;03m# If the specified revision is a branch or tag, look inside \"refs\".\u001b[39;00m\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1722\u001b[0m, in \u001b[0;36m_get_metadata_or_catch_error\u001b[1;34m(repo_id, filename, repo_type, revision, endpoint, proxies, etag_timeout, headers, local_files_only, relative_filename, storage_folder)\u001b[0m\n\u001b[0;32m 1721\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m-> 1722\u001b[0m metadata \u001b[38;5;241m=\u001b[39m get_hf_file_metadata(url\u001b[38;5;241m=\u001b[39murl, proxies\u001b[38;5;241m=\u001b[39mproxies, timeout\u001b[38;5;241m=\u001b[39metag_timeout, headers\u001b[38;5;241m=\u001b[39mheaders)\n\u001b[0;32m 1723\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m EntryNotFoundError \u001b[38;5;28;01mas\u001b[39;00m http_error:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\utils\\_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.._inner_fn\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 112\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[1;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1645\u001b[0m, in \u001b[0;36mget_hf_file_metadata\u001b[1;34m(url, token, proxies, timeout, library_name, library_version, user_agent, headers)\u001b[0m\n\u001b[0;32m 1644\u001b[0m \u001b[38;5;66;03m# Retrieve metadata\u001b[39;00m\n\u001b[1;32m-> 1645\u001b[0m r \u001b[38;5;241m=\u001b[39m _request_wrapper(\n\u001b[0;32m 1646\u001b[0m method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHEAD\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 1647\u001b[0m url\u001b[38;5;241m=\u001b[39murl,\n\u001b[0;32m 1648\u001b[0m headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[0;32m 1649\u001b[0m allow_redirects\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m 1650\u001b[0m follow_relative_redirects\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[0;32m 1651\u001b[0m proxies\u001b[38;5;241m=\u001b[39mproxies,\n\u001b[0;32m 1652\u001b[0m timeout\u001b[38;5;241m=\u001b[39mtimeout,\n\u001b[0;32m 1653\u001b[0m )\n\u001b[0;32m 1654\u001b[0m hf_raise_for_status(r)\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:372\u001b[0m, in \u001b[0;36m_request_wrapper\u001b[1;34m(method, url, follow_relative_redirects, **params)\u001b[0m\n\u001b[0;32m 371\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m follow_relative_redirects:\n\u001b[1;32m--> 372\u001b[0m response \u001b[38;5;241m=\u001b[39m _request_wrapper(\n\u001b[0;32m 373\u001b[0m method\u001b[38;5;241m=\u001b[39mmethod,\n\u001b[0;32m 374\u001b[0m url\u001b[38;5;241m=\u001b[39murl,\n\u001b[0;32m 375\u001b[0m follow_relative_redirects\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m 376\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams,\n\u001b[0;32m 377\u001b[0m )\n\u001b[0;32m 379\u001b[0m \u001b[38;5;66;03m# If redirection, we redirect only relative paths.\u001b[39;00m\n\u001b[0;32m 380\u001b[0m \u001b[38;5;66;03m# This is useful in case of a renamed repository.\u001b[39;00m\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:396\u001b[0m, in \u001b[0;36m_request_wrapper\u001b[1;34m(method, url, follow_relative_redirects, **params)\u001b[0m\n\u001b[0;32m 395\u001b[0m response \u001b[38;5;241m=\u001b[39m get_session()\u001b[38;5;241m.\u001b[39mrequest(method\u001b[38;5;241m=\u001b[39mmethod, url\u001b[38;5;241m=\u001b[39murl, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams)\n\u001b[1;32m--> 396\u001b[0m hf_raise_for_status(response)\n\u001b[0;32m 397\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\utils\\_errors.py:315\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[1;34m(response, endpoint_name)\u001b[0m\n\u001b[0;32m 314\u001b[0m message \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mstatus_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m Client Error.\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEntry Not Found for url: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m--> 315\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m EntryNotFoundError(message, response) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[0;32m 317\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m error_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGatedRepo\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n",
"\u001b[1;31mEntryNotFoundError\u001b[0m: 404 Client Error. (Request ID: Root=1-6689e097-39a33c1619221c92312ec259;76a7f0ec-2b46-42ee-9607-ec1424ceed75)\n\nEntry Not Found for url: https://huggingface.co/PlanTL-GOB-ES/es_anonimization_core_lg/resolve/main/config.json.",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[1;31mOSError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[114], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoTokenizer, AutoModelForTokenClassification\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPlanTL-GOB-ES/es_anonimization_core_lg\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPlanTL-GOB-ES/es_anonimization_core_lg\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m nlp \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:934\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 931\u001b[0m trust_remote_code \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtrust_remote_code\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[0;32m 932\u001b[0m code_revision \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcode_revision\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m--> 934\u001b[0m config_dict, unused_kwargs \u001b[38;5;241m=\u001b[39m PretrainedConfig\u001b[38;5;241m.\u001b[39mget_config_dict(pretrained_model_name_or_path, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 935\u001b[0m has_remote_code \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config_dict \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoConfig\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m 936\u001b[0m has_local_code \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config_dict \u001b[38;5;129;01mand\u001b[39;00m config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;129;01min\u001b[39;00m CONFIG_MAPPING\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\configuration_utils.py:632\u001b[0m, in \u001b[0;36mPretrainedConfig.get_config_dict\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 630\u001b[0m original_kwargs \u001b[38;5;241m=\u001b[39m copy\u001b[38;5;241m.\u001b[39mdeepcopy(kwargs)\n\u001b[0;32m 631\u001b[0m \u001b[38;5;66;03m# Get config dict associated with the base config file\u001b[39;00m\n\u001b[1;32m--> 632\u001b[0m config_dict, kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_get_config_dict(pretrained_model_name_or_path, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 633\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_commit_hash\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config_dict:\n\u001b[0;32m 634\u001b[0m original_kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_commit_hash\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_commit_hash\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\configuration_utils.py:689\u001b[0m, in \u001b[0;36mPretrainedConfig._get_config_dict\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 685\u001b[0m configuration_file \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_configuration_file\u001b[39m\u001b[38;5;124m\"\u001b[39m, CONFIG_NAME) \u001b[38;5;28;01mif\u001b[39;00m gguf_file \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m gguf_file\n\u001b[0;32m 687\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 688\u001b[0m \u001b[38;5;66;03m# Load from local folder or from cache or download from model Hub and cache\u001b[39;00m\n\u001b[1;32m--> 689\u001b[0m resolved_config_file \u001b[38;5;241m=\u001b[39m cached_file(\n\u001b[0;32m 690\u001b[0m pretrained_model_name_or_path,\n\u001b[0;32m 691\u001b[0m configuration_file,\n\u001b[0;32m 692\u001b[0m cache_dir\u001b[38;5;241m=\u001b[39mcache_dir,\n\u001b[0;32m 693\u001b[0m force_download\u001b[38;5;241m=\u001b[39mforce_download,\n\u001b[0;32m 694\u001b[0m proxies\u001b[38;5;241m=\u001b[39mproxies,\n\u001b[0;32m 695\u001b[0m resume_download\u001b[38;5;241m=\u001b[39mresume_download,\n\u001b[0;32m 696\u001b[0m local_files_only\u001b[38;5;241m=\u001b[39mlocal_files_only,\n\u001b[0;32m 697\u001b[0m token\u001b[38;5;241m=\u001b[39mtoken,\n\u001b[0;32m 698\u001b[0m user_agent\u001b[38;5;241m=\u001b[39muser_agent,\n\u001b[0;32m 699\u001b[0m revision\u001b[38;5;241m=\u001b[39mrevision,\n\u001b[0;32m 700\u001b[0m subfolder\u001b[38;5;241m=\u001b[39msubfolder,\n\u001b[0;32m 701\u001b[0m _commit_hash\u001b[38;5;241m=\u001b[39mcommit_hash,\n\u001b[0;32m 702\u001b[0m )\n\u001b[0;32m 703\u001b[0m commit_hash \u001b[38;5;241m=\u001b[39m extract_commit_hash(resolved_config_file, commit_hash)\n\u001b[0;32m 704\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m:\n\u001b[0;32m 705\u001b[0m \u001b[38;5;66;03m# Raise any environment error raise by `cached_file`. It will have a helpful error message adapted to\u001b[39;00m\n\u001b[0;32m 706\u001b[0m \u001b[38;5;66;03m# the original exception.\u001b[39;00m\n",
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\utils\\hub.py:453\u001b[0m, in \u001b[0;36mcached_file\u001b[1;34m(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)\u001b[0m\n\u001b[0;32m 451\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m revision \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 452\u001b[0m revision \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmain\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m--> 453\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[0;32m 454\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_or_repo_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m does not appear to have a file named \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfull_filename\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m. Checkout \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 455\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttps://huggingface.co/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_or_repo_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/tree/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrevision\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m for available files.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 456\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[0;32m 457\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m 458\u001b[0m resolved_file \u001b[38;5;241m=\u001b[39m _get_cache_file_to_return(path_or_repo_id, full_filename, cache_dir, revision)\n",
"\u001b[1;31mOSError\u001b[0m: PlanTL-GOB-ES/es_anonimization_core_lg does not appear to have a file named config.json. Checkout 'https://huggingface.co/PlanTL-GOB-ES/es_anonimization_core_lg/tree/main' for available files."
]
}
],
"source": [
"\n",
"\n",
"from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
"from transformers import pipeline\n",
"tokenizer = AutoTokenizer.from_pretrained(\"PlanTL-GOB-ES/es_anonimization_core_lg\")\n",
"model = AutoModelForTokenClassification.from_pretrained(\"PlanTL-GOB-ES/es_anonimization_core_lg\")\n",
"nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
"\n",
"ner_results = nlp(text)\n",
"ner_results\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c51532f0-b5c5-43ad-a5a4-936a4876fd9f",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}