{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "ced6196b-109c-489a-ab48-e65829ac6796", "metadata": {}, "outputs": [], "source": [ "from transformers import AutoTokenizer, RobertaForTokenClassification\n", "import torch\n", "from transformers import AutoTokenizer\n", "from json import JSONEncoder\n", "from transformers import AutoTokenizer, AutoModelForTokenClassification" ] }, { "cell_type": "code", "execution_count": 4, "id": "89c25949-7688-45f5-be0b-5bfe88bdbc05", "metadata": {}, "outputs": [], "source": [ "text=\"\"\"So, if you're a NASA scientist, you should be able to tell me the whole story about the Face On Mars, which obviously is evidence that there is life on Mars, and that the face was created by aliens, correct?\" No, twenty five years ago, our Viking 1 spacecraft was circling the planet, snapping photos, when it spotted the shadowy likeness of a human face. Us scientists figured out that it was just another Martian mesa, common around Cydonia, only this one had shadows that made it look like an Egyption Pharaoh. Very few days later, we revealed the image for all to see, and we made sure to note that it was a huge rock formation that just resembled a human head and face, but all of it was formed by shadows. We only announced it because we thought it would be a good way to engage the public with NASA's findings, and atrract attention to Mars-- and it did.\n", "\n", "The face on Mars soon became a pop icon; shot in movies, appeared in books, magazines, radio talk shows, and haunted grocery store checkout lines for 25 years. Some people thought the natural landform was evidence of life on Mars, and that us scientists wanted to hide it, but really, the defenders of the NASA budget wish there was ancient civilization on Mars. We decided to take another shot just to make sure we weren't wrong, on April 5, 1998. Michael Malin and his Mars Orbiter camera team took a picture that was ten times sharper than the original Viking photos, revealing a natural landform, which meant no alien monument. \"But that picture wasn't very clear at all, which could mean alien markings were hidden by haze\" Well no, yes that rumor started, but to prove them wrong on April 8, 2001 we decided to take another picture, making sure it was a cloudless summer day. Malin's team captured an amazing photo using the camera's absolute maximum revolution. With this camera you can discern things in a digital image, 3 times bigger than the pixel size which means if there were any signs of life, you could easily see what they were. What the picture showed was the butte or mesa, which are landforms common around the American West.\"\"\"" ] }, { "cell_type": "markdown", "id": "58c6201e-6dd3-4b80-b2de-4d91ca409b50", "metadata": {}, "source": [ "## Procesar json de salidas de modelos" ] }, { "cell_type": "code", "execution_count": 6, "id": "442f07fd-dbaf-4f52-8d5c-a8cbd09efdf2", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "def obtener_dataframe(data):\n", " \n", " \n", " data_flattened = [flatten_json(class_info) for class_info in data]\n", " df = pd.DataFrame(data_flattened)\n", " \n", " return df\n", " ###\n", " ### funcion \"flatten_json\" tomada de https://levelup.gitconnected.com/a-deep-dive-into-nested-json-to-data-frame-with-python-69bdabb41938 \n", " ### Renu Khandelwal Jul 23, 2023\n", "def flatten_json(y):\n", " try:\n", " out = {}\n", " \n", " def flatten(x, name=''):\n", " if type(x) is dict:\n", " for a in x:\n", " flatten(x[a], name + a + '_')\n", " elif type(x) is list:\n", " i = 0\n", " for a in x:\n", " flatten(a, name + str(i) + '_')\n", " i += 1\n", " else:\n", " out[name[:-1]] = x\n", " \n", " flatten(y)\n", " return out\n", " except json.JSONDecodeError:\n", " print(\"Error: The JSON document could not be decoded.\")\n", " except TypeError:\n", " print(\"Error: Invalid operation or function argument type.\")\n", " except KeyError:\n", " print(\"Error: One or more keys do not exist.\")\n", " except ValueError:\n", " print(\"Error: Invalid value detected.\")\n", " except Exception as e:\n", " # Catch any other exceptions\n", " print(f\"An unexpected error occurred: {str(e)}\") \n", "\n" ] }, { "cell_type": "markdown", "id": "6638783a-4b70-4ca4-8b63-fee3c38a090d", "metadata": {}, "source": [ "## 1 FacebookAI/xlm-roberta-large-finetuned-conll03-english" ] }, { "cell_type": "code", "execution_count": 5, "id": "eafceadf-dfa5-4b5b-b03a-c63abf6e81d0", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of the model checkpoint at FacebookAI/xlm-roberta-large-finetuned-conll03-english were not used when initializing XLMRobertaForTokenClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n", "- This IS expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "loss: 0.0\n" ] } ], "source": [ "tokenizer = AutoTokenizer.from_pretrained(\"FacebookAI/xlm-roberta-large-finetuned-conll03-english\")\n", "tokens = tokenizer.tokenize(text)\n", "ids = tokenizer.convert_tokens_to_ids(tokens)\n", "input_ids = torch.tensor([ids])\n", "model_args={\"trust_remote_code\": True}\n", "model = AutoModelForTokenClassification.from_pretrained(\"FacebookAI/xlm-roberta-large-finetuned-conll03-english\", **model_args)\n", "with torch.no_grad():\n", " logits = model(input_ids).logits\n", "predicted_token_class_ids = logits.argmax(-1)\n", "\n", "predicted_tokens_classes = [model.config.id2label[t.item()] for t in predicted_token_class_ids[0]]\n", "\n", "labels = predicted_token_class_ids\n", "loss = model(input_ids, labels=labels).loss\n", "print('loss:',round(loss.item(), 2))\n", "\n" ] }, { "cell_type": "markdown", "id": "9efe9516-d6c8-4242-bb2c-aeadd7f41c1d", "metadata": {}, "source": [ "## Metricas FacebookAI/xlm-roberta-large-finetuned-conll03-english" ] }, { "cell_type": "code", "execution_count": 46, "id": "b9c3289c-08e9-4546-81a1-324d29bb1989", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of the model checkpoint at FacebookAI/xlm-roberta-large-finetuned-conll03-english were not used when initializing XLMRobertaForTokenClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n", "- This IS expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[{'entity': 'I-ORG', 'score': 0.9999913, 'index': 8, 'word': '▁NASA', 'start': 16, 'end': 20}, {'entity': 'I-MISC', 'score': 0.9999641, 'index': 23, 'word': '▁Face', 'start': 88, 'end': 92}, {'entity': 'I-MISC', 'score': 0.99989665, 'index': 24, 'word': '▁On', 'start': 93, 'end': 95}, {'entity': 'I-MISC', 'score': 0.97350365, 'index': 25, 'word': '▁Mars', 'start': 96, 'end': 100}, {'entity': 'I-LOC', 'score': 0.9999362, 'index': 36, 'word': '▁Mars', 'start': 152, 'end': 156}, {'entity': 'I-MISC', 'score': 0.9992086, 'index': 58, 'word': '▁Viking', 'start': 240, 'end': 246}, {'entity': 'I-MISC', 'score': 0.9989502, 'index': 59, 'word': '▁1', 'start': 247, 'end': 248}, {'entity': 'I-MISC', 'score': 0.999977, 'index': 97, 'word': '▁Marti', 'start': 407, 'end': 412}, {'entity': 'I-MISC', 'score': 0.99619055, 'index': 98, 'word': 'an', 'start': 412, 'end': 414}, {'entity': 'I-LOC', 'score': 0.9999354, 'index': 103, 'word': '▁Cy', 'start': 435, 'end': 437}, {'entity': 'I-LOC', 'score': 0.99994576, 'index': 104, 'word': 'do', 'start': 437, 'end': 439}, {'entity': 'I-LOC', 'score': 0.99992585, 'index': 105, 'word': 'nia', 'start': 439, 'end': 442}, {'entity': 'I-MISC', 'score': 0.9999789, 'index': 119, 'word': '▁Egypt', 'start': 496, 'end': 501}, {'entity': 'I-MISC', 'score': 0.9614088, 'index': 120, 'word': 'ion', 'start': 501, 'end': 504}, {'entity': 'I-ORG', 'score': 0.99997246, 'index': 193, 'word': '▁NASA', 'start': 801, 'end': 805}, {'entity': 'I-LOC', 'score': 0.99979633, 'index': 205, 'word': '▁Mars', 'start': 843, 'end': 847}, {'entity': 'I-LOC', 'score': 0.9998061, 'index': 215, 'word': '▁Mars', 'start': 875, 'end': 879}, {'entity': 'I-LOC', 'score': 0.99984956, 'index': 264, 'word': '▁Mars', 'start': 1088, 'end': 1092}, {'entity': 'I-ORG', 'score': 0.99996305, 'index': 285, 'word': '▁NASA', 'start': 1169, 'end': 1173}, {'entity': 'I-LOC', 'score': 0.9998203, 'index': 295, 'word': '▁Mars', 'start': 1220, 'end': 1224}, {'entity': 'I-PER', 'score': 0.9999932, 'index': 319, 'word': '▁Michael', 'start': 1312, 'end': 1319}, {'entity': 'I-PER', 'score': 0.99999106, 'index': 320, 'word': '▁Malin', 'start': 1320, 'end': 1325}, {'entity': 'I-MISC', 'score': 0.94105357, 'index': 323, 'word': '▁Mars', 'start': 1334, 'end': 1338}, {'entity': 'I-MISC', 'score': 0.9839579, 'index': 324, 'word': '▁Or', 'start': 1339, 'end': 1341}, {'entity': 'I-MISC', 'score': 0.9913346, 'index': 325, 'word': 'bit', 'start': 1341, 'end': 1344}, {'entity': 'I-MISC', 'score': 0.9759228, 'index': 326, 'word': 'er', 'start': 1344, 'end': 1346}, {'entity': 'I-MISC', 'score': 0.999749, 'index': 341, 'word': '▁Viking', 'start': 1419, 'end': 1425}, {'entity': 'I-PER', 'score': 0.9999914, 'index': 416, 'word': '▁Malin', 'start': 1745, 'end': 1750}, {'entity': 'I-MISC', 'score': 0.92417294, 'index': 491, 'word': '▁American', 'start': 2094, 'end': 2102}, {'entity': 'I-LOC', 'score': 0.99954396, 'index': 492, 'word': '▁West', 'start': 2103, 'end': 2107}]\n" ] } ], "source": [ "import json\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"FacebookAI/xlm-roberta-large-finetuned-conll03-english\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"FacebookAI/xlm-roberta-large-finetuned-conll03-english\")\n", "classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "salida=classifier(text)\n", "print(salida)\n", "\n" ] }, { "cell_type": "code", "execution_count": 47, "id": "ef6050de-0b46-4477-be36-3e6132143cb2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "I-LOC 9\n", "I-MISC 15\n", "I-ORG 3\n", "I-PER 3\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word \n", "I-LOC do 1\n", " nia 1\n", " ▁Cy 1\n", " ▁Mars 5\n", " ▁West 1\n", "I-MISC an 1\n", " bit 1\n", " er 1\n", " ion 1\n", " ▁1 1\n", " ▁American 1\n", " ▁Egypt 1\n", " ▁Face 1\n", " ▁Mars 2\n", " ▁Marti 1\n", " ▁On 1\n", " ▁Or 1\n", " ▁Viking 2\n", "I-ORG ▁NASA 3\n", "I-PER ▁Malin 2\n", " ▁Michael 1\n", "dtype: int64" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "# Create DataFrame from flattened JSON\n", "with open(\"metricas_Facebook.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()" ] }, { "cell_type": "code", "execution_count": 29, "id": "9b975064-5919-4fe1-b266-8b9632d10e51", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'LOC': {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 7},\n", " 'MISC': {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 7},\n", " 'ORG': {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 3},\n", " 'PER': {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 2},\n", " 'overall_precision': 1.0,\n", " 'overall_recall': 1.0,\n", " 'overall_f1': 1.0,\n", " 'overall_accuracy': 1.0}" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#pip install seqeval\n", "from datasets import load_dataset, load_metric\n", "metric = load_metric(\"seqeval\",**model_args)\n", "metric.compute(predictions=[predicted_tokens_classes], references=[predicted_tokens_classes])" ] }, { "cell_type": "markdown", "id": "188bdc60-76fe-44a4-9424-b69cc0044b2f", "metadata": {}, "source": [ "## 2 manu/lilt-infoxlm-base" ] }, { "cell_type": "code", "execution_count": 52, "id": "8d891893-0445-47d1-aa06-8674ce5d8cce", "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "The checkpoint you are trying to load has model type `liltrobertalike` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:951\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:653\u001b[0m, in \u001b[0;36m_LazyConfigMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping:\n\u001b[1;32m--> 653\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[0;32m 654\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping[key]\n", "\u001b[1;31mKeyError\u001b[0m: 'liltrobertalike'", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[52], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoModelForMaskedLM\n\u001b[1;32m----> 2\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForMaskedLM\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmanu/lilt-infoxlm-base\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\auto_factory.py:523\u001b[0m, in \u001b[0;36m_BaseAutoModelClass.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[0;32m 520\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquantization_config\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 521\u001b[0m _ \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquantization_config\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m--> 523\u001b[0m config, kwargs \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 524\u001b[0m pretrained_model_name_or_path,\n\u001b[0;32m 525\u001b[0m return_unused_kwargs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[0;32m 526\u001b[0m trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code,\n\u001b[0;32m 527\u001b[0m code_revision\u001b[38;5;241m=\u001b[39mcode_revision,\n\u001b[0;32m 528\u001b[0m _commit_hash\u001b[38;5;241m=\u001b[39mcommit_hash,\n\u001b[0;32m 529\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mhub_kwargs,\n\u001b[0;32m 530\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[0;32m 531\u001b[0m )\n\u001b[0;32m 533\u001b[0m \u001b[38;5;66;03m# if torch_dtype=auto was passed here, ensure to pass it on\u001b[39;00m\n\u001b[0;32m 534\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m kwargs_orig\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtorch_dtype\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:953\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m--> 953\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 954\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe checkpoint you are trying to load has model type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 955\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut Transformers does not recognize this architecture. This could be because of an \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 956\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124missue with the checkpoint, or because your version of Transformers is out of date.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 957\u001b[0m )\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_dict(config_dict, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39munused_kwargs)\n\u001b[0;32m 959\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 960\u001b[0m \u001b[38;5;66;03m# Fallback: use pattern matching on the string.\u001b[39;00m\n\u001b[0;32m 961\u001b[0m \u001b[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001b[39;00m\n", "\u001b[1;31mValueError\u001b[0m: The checkpoint you are trying to load has model type `liltrobertalike` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date." ] } ], "source": [ "from transformers import AutoModelForMaskedLM\n", "model = AutoModelForMaskedLM.from_pretrained(\"manu/lilt-infoxlm-base\")" ] }, { "cell_type": "markdown", "id": "2a918756-380f-438f-a544-a4a6767e6eee", "metadata": {}, "source": [ "## 3 projecte-aina/DEBERTA_CIEL" ] }, { "cell_type": "code", "execution_count": 54, "id": "00dc1b6a-7538-4a3e-abff-49b5d72321a5", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[{'entity_group': 'GPE', 'score': 0.6340015, 'word': 'Cy', 'start': 434, 'end': 437}, {'entity_group': 'location-other', 'score': 0.20539406, 'word': 'donia', 'start': 437, 'end': 442}, {'entity_group': 'organization-other', 'score': 0.3824667, 'word': 'NASA', 'start': 1168, 'end': 1173}, {'entity_group': 'person-other', 'score': 0.62630403, 'word': 'Michael Malin', 'start': 1311, 'end': 1325}, {'entity_group': 'organization-other', 'score': 0.28832194, 'word': 'Mars', 'start': 1333, 'end': 1338}, {'entity_group': 'organization-privatecompany', 'score': 0.13149069, 'word': 'Orbiter', 'start': 1338, 'end': 1346}, {'entity_group': 'person-other', 'score': 0.27721033, 'word': 'Malin', 'start': 1744, 'end': 1750}, {'entity_group': 'location-other', 'score': 0.44634995, 'word': 'American', 'start': 2093, 'end': 2102}, {'entity_group': 'location-other', 'score': 0.18410492, 'word': 'West', 'start': 2102, 'end': 2107}]\n" ] } ], "source": [ "\n", "from transformers import pipeline\n", "\n", "pipe = pipeline(\"ner\", model=\"projecte-aina/DEBERTA_CIEL\")\n", "ner_entity_results = pipe(text, aggregation_strategy=\"simple\")\n", "print(ner_entity_results)" ] }, { "cell_type": "code", "execution_count": 56, "id": "183cc325-3f42-4207-9d8c-18cf79db19ad", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity_group\n", "GPE 1\n", "location-other 3\n", "organization-other 2\n", "organization-privatecompany 1\n", "person-other 2\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity_group word \n", "GPE Cy 1\n", "location-other American 1\n", " West 1\n", " donia 1\n", "organization-other Mars 1\n", " NASA 1\n", "organization-privatecompany Orbiter 1\n", "person-other Malin 1\n", " Michael Malin 1\n", "dtype: int64" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Create DataFrame from flattened JSON\n", "with open(\"3 metricas projecte-ainaDEBERTA_CIEL.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity_group']).size())\n", "aux.groupby(['entity_group', 'word']).size()" ] }, { "cell_type": "markdown", "id": "6ee85c10-af5e-475b-95f5-9b822eca4c04", "metadata": {}, "source": [ "## 4 gunghio/distilbert-base-multilingual-cased-finetuned-conll2003-ner" ] }, { "cell_type": "code", "execution_count": 58, "id": "3d3e7e83-4ed1-41b8-832c-9a544ef596e7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[{'entity_group': 'ORG', 'score': 0.98227155, 'word': 'NASA', 'start': 16, 'end': 20}, {'entity_group': 'MISC', 'score': 0.7845409, 'word': 'Face On Mars', 'start': 88, 'end': 100}, {'entity_group': 'LOC', 'score': 0.97792244, 'word': 'Mars', 'start': 152, 'end': 156}, {'entity_group': 'MISC', 'score': 0.9694425, 'word': 'Viking 1', 'start': 240, 'end': 248}, {'entity_group': 'MISC', 'score': 0.95397955, 'word': 'Martian', 'start': 407, 'end': 414}, {'entity_group': 'LOC', 'score': 0.92743087, 'word': 'Cydonia', 'start': 435, 'end': 442}, {'entity_group': 'MISC', 'score': 0.6778414, 'word': 'Egyption Ph', 'start': 496, 'end': 507}, {'entity_group': 'MISC', 'score': 0.6644676, 'word': '##oh', 'start': 510, 'end': 512}, {'entity_group': 'ORG', 'score': 0.9827271, 'word': 'NASA', 'start': 801, 'end': 805}, {'entity_group': 'LOC', 'score': 0.9836016, 'word': 'Mars', 'start': 843, 'end': 847}, {'entity_group': 'LOC', 'score': 0.97961295, 'word': 'Mars', 'start': 875, 'end': 879}, {'entity_group': 'LOC', 'score': 0.976, 'word': 'Mars', 'start': 1088, 'end': 1092}, {'entity_group': 'ORG', 'score': 0.9822379, 'word': 'NASA', 'start': 1169, 'end': 1173}, {'entity_group': 'LOC', 'score': 0.9719374, 'word': 'Mars', 'start': 1220, 'end': 1224}, {'entity_group': 'PER', 'score': 0.99495125, 'word': 'Michael Malin', 'start': 1312, 'end': 1325}, {'entity_group': 'ORG', 'score': 0.74125415, 'word': 'Mars Orbiter', 'start': 1334, 'end': 1346}, {'entity_group': 'MISC', 'score': 0.9728442, 'word': 'Viking', 'start': 1419, 'end': 1425}, {'entity_group': 'PER', 'score': 0.8149092, 'word': 'Malin', 'start': 1745, 'end': 1750}, {'entity_group': 'LOC', 'score': 0.9479703, 'word': 'American West', 'start': 2094, 'end': 2107}]\n" ] } ], "source": [ "from transformers import pipeline\n", "\n", "pipe = pipeline(\"token-classification\", model=\"gunghio/distilbert-base-multilingual-cased-finetuned-conll2003-ner\")\n", "#pipe = pipeline(\"ner\", model=\"projecte-aina/DEBERTA_CIEL\")\n", "ner_entity_results = pipe(text, aggregation_strategy=\"simple\")\n", "print(ner_entity_results)" ] }, { "cell_type": "code", "execution_count": 60, "id": "ba56217a-23f9-482b-bcb8-9a5295972506", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity_group\n", "LOC 7\n", "MISC 6\n", "ORG 4\n", "PER 2\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity_group word \n", "LOC American West 1\n", " Cydonia 1\n", " Mars 5\n", "MISC ##oh 1\n", " Egyption Ph 1\n", " Face On Mars 1\n", " Martian 1\n", " Viking 1\n", " Viking 1 1\n", "ORG Mars Orbiter 1\n", " NASA 3\n", "PER Malin 1\n", " Michael Malin 1\n", "dtype: int64" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "with open(\"4 metricas gunghio distilbert-base-multilingual-cased-finetuned-conll2003-ner.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity_group']).size())\n", "aux.groupby(['entity_group', 'word']).size()" ] }, { "cell_type": "markdown", "id": "2df8815c-41d5-412e-a650-8892c4fb3bdb", "metadata": {}, "source": [ "## 5 mrm8488/distilbert-base-multi-cased-finetuned-typo-detection" ] }, { "cell_type": "code", "execution_count": 62, "id": "a2162ff8-e760-433c-8266-9088bfe7764a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'entity': 'ok',\n", " 'score': 0.9418772,\n", " 'index': 2,\n", " 'word': ',',\n", " 'start': 2,\n", " 'end': 3},\n", " {'entity': 'ok',\n", " 'score': 0.9061194,\n", " 'index': 3,\n", " 'word': 'if',\n", " 'start': 4,\n", " 'end': 6},\n", " {'entity': 'ok',\n", " 'score': 0.9935272,\n", " 'index': 4,\n", " 'word': 'you',\n", " 'start': 7,\n", " 'end': 10},\n", " {'entity': 'ok',\n", " 'score': 0.9791702,\n", " 'index': 5,\n", " 'word': \"'\",\n", " 'start': 10,\n", " 'end': 11},\n", " {'entity': 'ok',\n", " 'score': 0.99081236,\n", " 'index': 6,\n", " 'word': 're',\n", " 'start': 11,\n", " 'end': 13},\n", " {'entity': 'ok',\n", " 'score': 0.979658,\n", " 'index': 7,\n", " 'word': 'a',\n", " 'start': 14,\n", " 'end': 15},\n", " {'entity': 'ok',\n", " 'score': 0.7008045,\n", " 'index': 8,\n", " 'word': 'NASA',\n", " 'start': 16,\n", " 'end': 20},\n", " {'entity': 'ok',\n", " 'score': 0.9583886,\n", " 'index': 9,\n", " 'word': 'scientist',\n", " 'start': 21,\n", " 'end': 30},\n", " {'entity': 'ok',\n", " 'score': 0.9387937,\n", " 'index': 10,\n", " 'word': ',',\n", " 'start': 30,\n", " 'end': 31},\n", " {'entity': 'typo',\n", " 'score': 0.955443,\n", " 'index': 11,\n", " 'word': 'you',\n", " 'start': 32,\n", " 'end': 35},\n", " {'entity': 'typo',\n", " 'score': 0.5262742,\n", " 'index': 12,\n", " 'word': 'should',\n", " 'start': 36,\n", " 'end': 42},\n", " {'entity': 'ok',\n", " 'score': 0.943428,\n", " 'index': 13,\n", " 'word': 'be',\n", " 'start': 43,\n", " 'end': 45},\n", " {'entity': 'ok',\n", " 'score': 0.9880336,\n", " 'index': 14,\n", " 'word': 'able',\n", " 'start': 46,\n", " 'end': 50},\n", " {'entity': 'ok',\n", " 'score': 0.99831665,\n", " 'index': 15,\n", " 'word': 'to',\n", " 'start': 51,\n", " 'end': 53},\n", " {'entity': 'ok',\n", " 'score': 0.57461846,\n", " 'index': 16,\n", " 'word': 'tell',\n", " 'start': 54,\n", " 'end': 58},\n", " {'entity': 'ok',\n", " 'score': 0.9945175,\n", " 'index': 17,\n", " 'word': 'me',\n", " 'start': 59,\n", " 'end': 61},\n", " {'entity': 'ok',\n", " 'score': 0.810129,\n", " 'index': 18,\n", " 'word': 'the',\n", " 'start': 62,\n", " 'end': 65},\n", " {'entity': 'ok',\n", " 'score': 0.5944688,\n", " 'index': 19,\n", " 'word': 'whole',\n", " 'start': 66,\n", " 'end': 71},\n", " {'entity': 'ok',\n", " 'score': 0.9751041,\n", " 'index': 20,\n", " 'word': 'story',\n", " 'start': 72,\n", " 'end': 77},\n", " {'entity': 'ok',\n", " 'score': 0.8432575,\n", " 'index': 21,\n", " 'word': 'about',\n", " 'start': 78,\n", " 'end': 83},\n", " {'entity': 'typo',\n", " 'score': 0.93727547,\n", " 'index': 22,\n", " 'word': 'the',\n", " 'start': 84,\n", " 'end': 87},\n", " {'entity': 'typo',\n", " 'score': 0.9830071,\n", " 'index': 23,\n", " 'word': 'Face',\n", " 'start': 88,\n", " 'end': 92},\n", " {'entity': 'ok',\n", " 'score': 0.9957579,\n", " 'index': 24,\n", " 'word': 'On',\n", " 'start': 93,\n", " 'end': 95},\n", " {'entity': 'ok',\n", " 'score': 0.98799103,\n", " 'index': 25,\n", " 'word': 'Mars',\n", " 'start': 96,\n", " 'end': 100},\n", " {'entity': 'typo',\n", " 'score': 0.951169,\n", " 'index': 26,\n", " 'word': ',',\n", " 'start': 100,\n", " 'end': 101},\n", " {'entity': 'typo',\n", " 'score': 0.9631144,\n", " 'index': 27,\n", " 'word': 'which',\n", " 'start': 102,\n", " 'end': 107},\n", " {'entity': 'typo',\n", " 'score': 0.99146336,\n", " 'index': 28,\n", " 'word': 'obvious',\n", " 'start': 108,\n", " 'end': 115},\n", " {'entity': 'ok',\n", " 'score': 0.9865329,\n", " 'index': 29,\n", " 'word': '##ly',\n", " 'start': 115,\n", " 'end': 117},\n", " {'entity': 'ok',\n", " 'score': 0.99190086,\n", " 'index': 30,\n", " 'word': 'is',\n", " 'start': 118,\n", " 'end': 120},\n", " {'entity': 'ok',\n", " 'score': 0.87074775,\n", " 'index': 31,\n", " 'word': 'evidence',\n", " 'start': 121,\n", " 'end': 129},\n", " {'entity': 'ok',\n", " 'score': 0.99500775,\n", " 'index': 32,\n", " 'word': 'that',\n", " 'start': 130,\n", " 'end': 134},\n", " {'entity': 'typo',\n", " 'score': 0.94334555,\n", " 'index': 33,\n", " 'word': 'there',\n", " 'start': 135,\n", " 'end': 140},\n", " {'entity': 'ok',\n", " 'score': 0.8655428,\n", " 'index': 34,\n", " 'word': 'is',\n", " 'start': 141,\n", " 'end': 143},\n", " {'entity': 'typo',\n", " 'score': 0.5147766,\n", " 'index': 35,\n", " 'word': 'life',\n", " 'start': 144,\n", " 'end': 148},\n", " {'entity': 'ok',\n", " 'score': 0.9896828,\n", " 'index': 36,\n", " 'word': 'on',\n", " 'start': 149,\n", " 'end': 151},\n", " {'entity': 'ok',\n", " 'score': 0.94172686,\n", " 'index': 37,\n", " 'word': 'Mars',\n", " 'start': 152,\n", " 'end': 156},\n", " {'entity': 'typo',\n", " 'score': 0.92494434,\n", " 'index': 38,\n", " 'word': ',',\n", " 'start': 156,\n", " 'end': 157},\n", " {'entity': 'ok',\n", " 'score': 0.9805345,\n", " 'index': 39,\n", " 'word': 'and',\n", " 'start': 158,\n", " 'end': 161},\n", " {'entity': 'ok',\n", " 'score': 0.8247318,\n", " 'index': 40,\n", " 'word': 'that',\n", " 'start': 162,\n", " 'end': 166},\n", " {'entity': 'typo',\n", " 'score': 0.8335082,\n", " 'index': 41,\n", " 'word': 'the',\n", " 'start': 167,\n", " 'end': 170},\n", " {'entity': 'typo',\n", " 'score': 0.984109,\n", " 'index': 42,\n", " 'word': 'face',\n", " 'start': 171,\n", " 'end': 175},\n", " {'entity': 'ok',\n", " 'score': 0.9346752,\n", " 'index': 43,\n", " 'word': 'was',\n", " 'start': 176,\n", " 'end': 179},\n", " {'entity': 'typo',\n", " 'score': 0.61687183,\n", " 'index': 44,\n", " 'word': 'created',\n", " 'start': 180,\n", " 'end': 187},\n", " {'entity': 'ok',\n", " 'score': 0.9703255,\n", " 'index': 45,\n", " 'word': 'by',\n", " 'start': 188,\n", " 'end': 190},\n", " {'entity': 'typo',\n", " 'score': 0.6155792,\n", " 'index': 46,\n", " 'word': 'alien',\n", " 'start': 191,\n", " 'end': 196},\n", " {'entity': 'ok',\n", " 'score': 0.93030524,\n", " 'index': 47,\n", " 'word': '##s',\n", " 'start': 196,\n", " 'end': 197},\n", " {'entity': 'typo',\n", " 'score': 0.8034546,\n", " 'index': 48,\n", " 'word': ',',\n", " 'start': 197,\n", " 'end': 198},\n", " {'entity': 'ok',\n", " 'score': 0.8684226,\n", " 'index': 49,\n", " 'word': 'correct',\n", " 'start': 199,\n", " 'end': 206},\n", " {'entity': 'ok',\n", " 'score': 0.9899316,\n", " 'index': 50,\n", " 'word': '?',\n", " 'start': 206,\n", " 'end': 207},\n", " {'entity': 'typo',\n", " 'score': 0.68965435,\n", " 'index': 51,\n", " 'word': '\"',\n", " 'start': 207,\n", " 'end': 208},\n", " {'entity': 'typo',\n", " 'score': 0.5541892,\n", " 'index': 52,\n", " 'word': 'No',\n", " 'start': 209,\n", " 'end': 211},\n", " {'entity': 'ok',\n", " 'score': 0.99887687,\n", " 'index': 53,\n", " 'word': ',',\n", " 'start': 211,\n", " 'end': 212},\n", " {'entity': 'ok',\n", " 'score': 0.9993892,\n", " 'index': 54,\n", " 'word': 'twenty',\n", " 'start': 213,\n", " 'end': 219},\n", " {'entity': 'ok',\n", " 'score': 0.9983181,\n", " 'index': 55,\n", " 'word': 'five',\n", " 'start': 220,\n", " 'end': 224},\n", " {'entity': 'ok',\n", " 'score': 0.9524137,\n", " 'index': 56,\n", " 'word': 'years',\n", " 'start': 225,\n", " 'end': 230},\n", " {'entity': 'ok',\n", " 'score': 0.98393893,\n", " 'index': 57,\n", " 'word': 'ago',\n", " 'start': 231,\n", " 'end': 234},\n", " {'entity': 'ok',\n", " 'score': 0.877359,\n", " 'index': 58,\n", " 'word': ',',\n", " 'start': 234,\n", " 'end': 235},\n", " {'entity': 'typo',\n", " 'score': 0.8272593,\n", " 'index': 59,\n", " 'word': 'our',\n", " 'start': 236,\n", " 'end': 239},\n", " {'entity': 'ok',\n", " 'score': 0.88372874,\n", " 'index': 60,\n", " 'word': 'Viking',\n", " 'start': 240,\n", " 'end': 246},\n", " {'entity': 'ok',\n", " 'score': 0.99642414,\n", " 'index': 61,\n", " 'word': '1',\n", " 'start': 247,\n", " 'end': 248},\n", " {'entity': 'ok',\n", " 'score': 0.9792823,\n", " 'index': 62,\n", " 'word': 'spacecraft',\n", " 'start': 249,\n", " 'end': 259},\n", " {'entity': 'ok',\n", " 'score': 0.7188466,\n", " 'index': 63,\n", " 'word': 'was',\n", " 'start': 260,\n", " 'end': 263},\n", " {'entity': 'ok',\n", " 'score': 0.6053355,\n", " 'index': 64,\n", " 'word': 'ci',\n", " 'start': 264,\n", " 'end': 266},\n", " {'entity': 'ok',\n", " 'score': 0.98061955,\n", " 'index': 65,\n", " 'word': '##rc',\n", " 'start': 266,\n", " 'end': 268},\n", " {'entity': 'ok',\n", " 'score': 0.9918943,\n", " 'index': 66,\n", " 'word': '##ling',\n", " 'start': 268,\n", " 'end': 272},\n", " {'entity': 'ok',\n", " 'score': 0.9991246,\n", " 'index': 67,\n", " 'word': 'the',\n", " 'start': 273,\n", " 'end': 276},\n", " {'entity': 'ok',\n", " 'score': 0.99520606,\n", " 'index': 68,\n", " 'word': 'planet',\n", " 'start': 277,\n", " 'end': 283},\n", " {'entity': 'typo',\n", " 'score': 0.9603083,\n", " 'index': 69,\n", " 'word': ',',\n", " 'start': 283,\n", " 'end': 284},\n", " {'entity': 'typo',\n", " 'score': 0.9765087,\n", " 'index': 70,\n", " 'word': 'sna',\n", " 'start': 285,\n", " 'end': 288},\n", " {'entity': 'ok',\n", " 'score': 0.9886219,\n", " 'index': 71,\n", " 'word': '##pping',\n", " 'start': 288,\n", " 'end': 293},\n", " {'entity': 'ok',\n", " 'score': 0.99900526,\n", " 'index': 72,\n", " 'word': 'photos',\n", " 'start': 294,\n", " 'end': 300},\n", " {'entity': 'typo',\n", " 'score': 0.8964089,\n", " 'index': 73,\n", " 'word': ',',\n", " 'start': 300,\n", " 'end': 301},\n", " {'entity': 'typo',\n", " 'score': 0.8211978,\n", " 'index': 74,\n", " 'word': 'when',\n", " 'start': 302,\n", " 'end': 306},\n", " {'entity': 'typo',\n", " 'score': 0.9168602,\n", " 'index': 75,\n", " 'word': 'it',\n", " 'start': 307,\n", " 'end': 309},\n", " {'entity': 'ok',\n", " 'score': 0.9347477,\n", " 'index': 76,\n", " 'word': 'spotted',\n", " 'start': 310,\n", " 'end': 317},\n", " {'entity': 'ok',\n", " 'score': 0.9530431,\n", " 'index': 77,\n", " 'word': 'the',\n", " 'start': 318,\n", " 'end': 321},\n", " {'entity': 'typo',\n", " 'score': 0.52226573,\n", " 'index': 78,\n", " 'word': 'sh',\n", " 'start': 322,\n", " 'end': 324},\n", " {'entity': 'ok',\n", " 'score': 0.98722374,\n", " 'index': 79,\n", " 'word': '##adow',\n", " 'start': 324,\n", " 'end': 328},\n", " {'entity': 'ok',\n", " 'score': 0.9884067,\n", " 'index': 80,\n", " 'word': '##y',\n", " 'start': 328,\n", " 'end': 329},\n", " {'entity': 'ok',\n", " 'score': 0.99751353,\n", " 'index': 81,\n", " 'word': 'like',\n", " 'start': 330,\n", " 'end': 334},\n", " {'entity': 'ok',\n", " 'score': 0.98541266,\n", " 'index': 82,\n", " 'word': '##ness',\n", " 'start': 334,\n", " 'end': 338},\n", " {'entity': 'ok',\n", " 'score': 0.99096996,\n", " 'index': 83,\n", " 'word': 'of',\n", " 'start': 339,\n", " 'end': 341},\n", " {'entity': 'ok',\n", " 'score': 0.9794129,\n", " 'index': 84,\n", " 'word': 'a',\n", " 'start': 342,\n", " 'end': 343},\n", " {'entity': 'ok',\n", " 'score': 0.9906974,\n", " 'index': 85,\n", " 'word': 'human',\n", " 'start': 344,\n", " 'end': 349},\n", " {'entity': 'ok',\n", " 'score': 0.99601525,\n", " 'index': 86,\n", " 'word': 'face',\n", " 'start': 350,\n", " 'end': 354},\n", " {'entity': 'typo',\n", " 'score': 0.80661726,\n", " 'index': 87,\n", " 'word': '.',\n", " 'start': 354,\n", " 'end': 355},\n", " {'entity': 'typo',\n", " 'score': 0.8332319,\n", " 'index': 88,\n", " 'word': 'Us',\n", " 'start': 356,\n", " 'end': 358},\n", " {'entity': 'ok',\n", " 'score': 0.9995962,\n", " 'index': 89,\n", " 'word': 'scientists',\n", " 'start': 359,\n", " 'end': 369},\n", " {'entity': 'typo',\n", " 'score': 0.83959967,\n", " 'index': 90,\n", " 'word': 'figure',\n", " 'start': 370,\n", " 'end': 376},\n", " {'entity': 'ok',\n", " 'score': 0.99624974,\n", " 'index': 91,\n", " 'word': '##d',\n", " 'start': 376,\n", " 'end': 377},\n", " {'entity': 'ok',\n", " 'score': 0.9980217,\n", " 'index': 92,\n", " 'word': 'out',\n", " 'start': 378,\n", " 'end': 381},\n", " {'entity': 'ok',\n", " 'score': 0.5464159,\n", " 'index': 93,\n", " 'word': 'that',\n", " 'start': 382,\n", " 'end': 386},\n", " {'entity': 'typo',\n", " 'score': 0.95522094,\n", " 'index': 94,\n", " 'word': 'it',\n", " 'start': 387,\n", " 'end': 389},\n", " {'entity': 'ok',\n", " 'score': 0.53848577,\n", " 'index': 95,\n", " 'word': 'was',\n", " 'start': 390,\n", " 'end': 393},\n", " {'entity': 'typo',\n", " 'score': 0.9315185,\n", " 'index': 96,\n", " 'word': 'just',\n", " 'start': 394,\n", " 'end': 398},\n", " {'entity': 'ok',\n", " 'score': 0.92040026,\n", " 'index': 97,\n", " 'word': 'another',\n", " 'start': 399,\n", " 'end': 406},\n", " {'entity': 'typo',\n", " 'score': 0.5492458,\n", " 'index': 98,\n", " 'word': 'Mart',\n", " 'start': 407,\n", " 'end': 411},\n", " {'entity': 'ok',\n", " 'score': 0.91541255,\n", " 'index': 99,\n", " 'word': '##ian',\n", " 'start': 411,\n", " 'end': 414},\n", " {'entity': 'typo',\n", " 'score': 0.9870064,\n", " 'index': 100,\n", " 'word': 'mesa',\n", " 'start': 415,\n", " 'end': 419},\n", " {'entity': 'typo',\n", " 'score': 0.9882908,\n", " 'index': 101,\n", " 'word': ',',\n", " 'start': 419,\n", " 'end': 420},\n", " {'entity': 'typo',\n", " 'score': 0.94008124,\n", " 'index': 102,\n", " 'word': 'common',\n", " 'start': 421,\n", " 'end': 427},\n", " {'entity': 'ok',\n", " 'score': 0.99831474,\n", " 'index': 103,\n", " 'word': 'around',\n", " 'start': 428,\n", " 'end': 434},\n", " {'entity': 'ok',\n", " 'score': 0.9985098,\n", " 'index': 104,\n", " 'word': 'C',\n", " 'start': 435,\n", " 'end': 436},\n", " {'entity': 'ok',\n", " 'score': 0.95086056,\n", " 'index': 105,\n", " 'word': '##yd',\n", " 'start': 436,\n", " 'end': 438},\n", " {'entity': 'ok',\n", " 'score': 0.99512345,\n", " 'index': 106,\n", " 'word': '##onia',\n", " 'start': 438,\n", " 'end': 442},\n", " {'entity': 'ok',\n", " 'score': 0.74133,\n", " 'index': 107,\n", " 'word': ',',\n", " 'start': 442,\n", " 'end': 443},\n", " {'entity': 'ok',\n", " 'score': 0.976595,\n", " 'index': 108,\n", " 'word': 'only',\n", " 'start': 444,\n", " 'end': 448},\n", " {'entity': 'ok',\n", " 'score': 0.999059,\n", " 'index': 109,\n", " 'word': 'this',\n", " 'start': 449,\n", " 'end': 453},\n", " {'entity': 'ok',\n", " 'score': 0.9988238,\n", " 'index': 110,\n", " 'word': 'one',\n", " 'start': 454,\n", " 'end': 457},\n", " {'entity': 'ok',\n", " 'score': 0.8208375,\n", " 'index': 111,\n", " 'word': 'had',\n", " 'start': 458,\n", " 'end': 461},\n", " {'entity': 'typo',\n", " 'score': 0.70970184,\n", " 'index': 112,\n", " 'word': 'sh',\n", " 'start': 462,\n", " 'end': 464},\n", " {'entity': 'ok',\n", " 'score': 0.96421015,\n", " 'index': 113,\n", " 'word': '##adow',\n", " 'start': 464,\n", " 'end': 468},\n", " {'entity': 'ok',\n", " 'score': 0.9639605,\n", " 'index': 114,\n", " 'word': '##s',\n", " 'start': 468,\n", " 'end': 469},\n", " {'entity': 'ok',\n", " 'score': 0.90302604,\n", " 'index': 115,\n", " 'word': 'that',\n", " 'start': 470,\n", " 'end': 474},\n", " {'entity': 'ok',\n", " 'score': 0.6680046,\n", " 'index': 116,\n", " 'word': 'made',\n", " 'start': 475,\n", " 'end': 479},\n", " {'entity': 'ok',\n", " 'score': 0.9925667,\n", " 'index': 117,\n", " 'word': 'it',\n", " 'start': 480,\n", " 'end': 482},\n", " {'entity': 'typo',\n", " 'score': 0.5095613,\n", " 'index': 118,\n", " 'word': 'look',\n", " 'start': 483,\n", " 'end': 487},\n", " {'entity': 'ok',\n", " 'score': 0.9739361,\n", " 'index': 119,\n", " 'word': 'like',\n", " 'start': 488,\n", " 'end': 492},\n", " {'entity': 'ok',\n", " 'score': 0.9018896,\n", " 'index': 120,\n", " 'word': 'an',\n", " 'start': 493,\n", " 'end': 495},\n", " {'entity': 'typo',\n", " 'score': 0.5793097,\n", " 'index': 121,\n", " 'word': 'Egypt',\n", " 'start': 496,\n", " 'end': 501},\n", " {'entity': 'ok',\n", " 'score': 0.9924223,\n", " 'index': 122,\n", " 'word': '##ion',\n", " 'start': 501,\n", " 'end': 504},\n", " {'entity': 'ok',\n", " 'score': 0.9970651,\n", " 'index': 123,\n", " 'word': 'Ph',\n", " 'start': 505,\n", " 'end': 507},\n", " {'entity': 'ok',\n", " 'score': 0.99351513,\n", " 'index': 124,\n", " 'word': '##ara',\n", " 'start': 507,\n", " 'end': 510},\n", " {'entity': 'ok',\n", " 'score': 0.98944503,\n", " 'index': 125,\n", " 'word': '##oh',\n", " 'start': 510,\n", " 'end': 512},\n", " {'entity': 'typo',\n", " 'score': 0.44638777,\n", " 'index': 126,\n", " 'word': '.',\n", " 'start': 512,\n", " 'end': 513},\n", " {'entity': 'typo',\n", " 'score': 0.9027144,\n", " 'index': 127,\n", " 'word': 'Very',\n", " 'start': 514,\n", " 'end': 518},\n", " {'entity': 'ok',\n", " 'score': 0.95206773,\n", " 'index': 128,\n", " 'word': 'few',\n", " 'start': 519,\n", " 'end': 522},\n", " {'entity': 'ok',\n", " 'score': 0.88961726,\n", " 'index': 129,\n", " 'word': 'days',\n", " 'start': 523,\n", " 'end': 527},\n", " {'entity': 'ok',\n", " 'score': 0.9765072,\n", " 'index': 130,\n", " 'word': 'later',\n", " 'start': 528,\n", " 'end': 533},\n", " {'entity': 'typo',\n", " 'score': 0.884694,\n", " 'index': 131,\n", " 'word': ',',\n", " 'start': 533,\n", " 'end': 534},\n", " {'entity': 'typo',\n", " 'score': 0.9981382,\n", " 'index': 132,\n", " 'word': 'we',\n", " 'start': 535,\n", " 'end': 537},\n", " {'entity': 'typo',\n", " 'score': 0.8012792,\n", " 'index': 133,\n", " 'word': 'revealed',\n", " 'start': 538,\n", " 'end': 546},\n", " {'entity': 'ok',\n", " 'score': 0.91450936,\n", " 'index': 134,\n", " 'word': 'the',\n", " 'start': 547,\n", " 'end': 550},\n", " {'entity': 'typo',\n", " 'score': 0.9744159,\n", " 'index': 135,\n", " 'word': 'image',\n", " 'start': 551,\n", " 'end': 556},\n", " {'entity': 'typo',\n", " 'score': 0.9916164,\n", " 'index': 136,\n", " 'word': 'for',\n", " 'start': 557,\n", " 'end': 560},\n", " {'entity': 'typo',\n", " 'score': 0.5908708,\n", " 'index': 137,\n", " 'word': 'all',\n", " 'start': 561,\n", " 'end': 564},\n", " {'entity': 'ok',\n", " 'score': 0.99475324,\n", " 'index': 138,\n", " 'word': 'to',\n", " 'start': 565,\n", " 'end': 567},\n", " {'entity': 'ok',\n", " 'score': 0.8057313,\n", " 'index': 139,\n", " 'word': 'see',\n", " 'start': 568,\n", " 'end': 571},\n", " {'entity': 'typo',\n", " 'score': 0.61359376,\n", " 'index': 140,\n", " 'word': ',',\n", " 'start': 571,\n", " 'end': 572},\n", " {'entity': 'ok',\n", " 'score': 0.99861836,\n", " 'index': 141,\n", " 'word': 'and',\n", " 'start': 573,\n", " 'end': 576},\n", " {'entity': 'ok',\n", " 'score': 0.6784175,\n", " 'index': 142,\n", " 'word': 'we',\n", " 'start': 577,\n", " 'end': 579},\n", " {'entity': 'ok',\n", " 'score': 0.9465173,\n", " 'index': 143,\n", " 'word': 'made',\n", " 'start': 580,\n", " 'end': 584},\n", " {'entity': 'ok',\n", " 'score': 0.9947103,\n", " 'index': 144,\n", " 'word': 'sure',\n", " 'start': 585,\n", " 'end': 589},\n", " {'entity': 'ok',\n", " 'score': 0.99907136,\n", " 'index': 145,\n", " 'word': 'to',\n", " 'start': 590,\n", " 'end': 592},\n", " {'entity': 'ok',\n", " 'score': 0.98897797,\n", " 'index': 146,\n", " 'word': 'note',\n", " 'start': 593,\n", " 'end': 597},\n", " {'entity': 'ok',\n", " 'score': 0.9916495,\n", " 'index': 147,\n", " 'word': 'that',\n", " 'start': 598,\n", " 'end': 602},\n", " {'entity': 'typo',\n", " 'score': 0.8916498,\n", " 'index': 148,\n", " 'word': 'it',\n", " 'start': 603,\n", " 'end': 605},\n", " {'entity': 'ok',\n", " 'score': 0.59980196,\n", " 'index': 149,\n", " 'word': 'was',\n", " 'start': 606,\n", " 'end': 609},\n", " {'entity': 'typo',\n", " 'score': 0.6458891,\n", " 'index': 150,\n", " 'word': 'a',\n", " 'start': 610,\n", " 'end': 611},\n", " {'entity': 'ok',\n", " 'score': 0.86706686,\n", " 'index': 151,\n", " 'word': 'huge',\n", " 'start': 612,\n", " 'end': 616},\n", " {'entity': 'ok',\n", " 'score': 0.74633974,\n", " 'index': 152,\n", " 'word': 'rock',\n", " 'start': 617,\n", " 'end': 621},\n", " {'entity': 'ok',\n", " 'score': 0.9891533,\n", " 'index': 153,\n", " 'word': 'formation',\n", " 'start': 622,\n", " 'end': 631},\n", " {'entity': 'ok',\n", " 'score': 0.8717077,\n", " 'index': 154,\n", " 'word': 'that',\n", " 'start': 632,\n", " 'end': 636},\n", " {'entity': 'typo',\n", " 'score': 0.99484986,\n", " 'index': 155,\n", " 'word': 'just',\n", " 'start': 637,\n", " 'end': 641},\n", " {'entity': 'typo',\n", " 'score': 0.5499501,\n", " 'index': 156,\n", " 'word': 'res',\n", " 'start': 642,\n", " 'end': 645},\n", " {'entity': 'ok',\n", " 'score': 0.89776593,\n", " 'index': 157,\n", " 'word': '##emble',\n", " 'start': 645,\n", " 'end': 650},\n", " {'entity': 'ok',\n", " 'score': 0.9908867,\n", " 'index': 158,\n", " 'word': '##d',\n", " 'start': 650,\n", " 'end': 651},\n", " {'entity': 'ok',\n", " 'score': 0.747778,\n", " 'index': 159,\n", " 'word': 'a',\n", " 'start': 652,\n", " 'end': 653},\n", " {'entity': 'ok',\n", " 'score': 0.9308926,\n", " 'index': 160,\n", " 'word': 'human',\n", " 'start': 654,\n", " 'end': 659},\n", " {'entity': 'ok',\n", " 'score': 0.8658663,\n", " 'index': 161,\n", " 'word': 'head',\n", " 'start': 660,\n", " 'end': 664},\n", " {'entity': 'ok',\n", " 'score': 0.9994103,\n", " 'index': 162,\n", " 'word': 'and',\n", " 'start': 665,\n", " 'end': 668},\n", " {'entity': 'ok',\n", " 'score': 0.99929607,\n", " 'index': 163,\n", " 'word': 'face',\n", " 'start': 669,\n", " 'end': 673},\n", " {'entity': 'ok',\n", " 'score': 0.86389965,\n", " 'index': 164,\n", " 'word': ',',\n", " 'start': 673,\n", " 'end': 674},\n", " {'entity': 'ok',\n", " 'score': 0.99792165,\n", " 'index': 165,\n", " 'word': 'but',\n", " 'start': 675,\n", " 'end': 678},\n", " {'entity': 'ok',\n", " 'score': 0.9912469,\n", " 'index': 166,\n", " 'word': 'all',\n", " 'start': 679,\n", " 'end': 682},\n", " {'entity': 'ok',\n", " 'score': 0.9983467,\n", " 'index': 167,\n", " 'word': 'of',\n", " 'start': 683,\n", " 'end': 685},\n", " {'entity': 'ok',\n", " 'score': 0.9972486,\n", " 'index': 168,\n", " 'word': 'it',\n", " 'start': 686,\n", " 'end': 688},\n", " {'entity': 'ok',\n", " 'score': 0.99832314,\n", " 'index': 169,\n", " 'word': 'was',\n", " 'start': 689,\n", " 'end': 692},\n", " {'entity': 'ok',\n", " 'score': 0.9965006,\n", " 'index': 170,\n", " 'word': 'formed',\n", " 'start': 693,\n", " 'end': 699},\n", " {'entity': 'ok',\n", " 'score': 0.9985544,\n", " 'index': 171,\n", " 'word': 'by',\n", " 'start': 700,\n", " 'end': 702},\n", " {'entity': 'ok',\n", " 'score': 0.98051447,\n", " 'index': 172,\n", " 'word': 'sh',\n", " 'start': 703,\n", " 'end': 705},\n", " {'entity': 'ok',\n", " 'score': 0.9919695,\n", " 'index': 173,\n", " 'word': '##adow',\n", " 'start': 705,\n", " 'end': 709},\n", " {'entity': 'ok',\n", " 'score': 0.986326,\n", " 'index': 174,\n", " 'word': '##s',\n", " 'start': 709,\n", " 'end': 710},\n", " {'entity': 'typo',\n", " 'score': 0.79459554,\n", " 'index': 175,\n", " 'word': '.',\n", " 'start': 710,\n", " 'end': 711},\n", " {'entity': 'typo',\n", " 'score': 0.99913234,\n", " 'index': 176,\n", " 'word': 'We',\n", " 'start': 712,\n", " 'end': 714},\n", " {'entity': 'ok',\n", " 'score': 0.9165677,\n", " 'index': 177,\n", " 'word': 'only',\n", " 'start': 715,\n", " 'end': 719},\n", " {'entity': 'ok',\n", " 'score': 0.9994357,\n", " 'index': 178,\n", " 'word': 'announced',\n", " 'start': 720,\n", " 'end': 729},\n", " {'entity': 'ok',\n", " 'score': 0.9980812,\n", " 'index': 179,\n", " 'word': 'it',\n", " 'start': 730,\n", " 'end': 732},\n", " {'entity': 'ok',\n", " 'score': 0.996609,\n", " 'index': 180,\n", " 'word': 'because',\n", " 'start': 733,\n", " 'end': 740},\n", " {'entity': 'ok',\n", " 'score': 0.7698178,\n", " 'index': 181,\n", " 'word': 'we',\n", " 'start': 741,\n", " 'end': 743},\n", " {'entity': 'ok',\n", " 'score': 0.9095254,\n", " 'index': 182,\n", " 'word': 'thought',\n", " 'start': 744,\n", " 'end': 751},\n", " {'entity': 'ok',\n", " 'score': 0.9074025,\n", " 'index': 183,\n", " 'word': 'it',\n", " 'start': 752,\n", " 'end': 754},\n", " {'entity': 'ok',\n", " 'score': 0.94705737,\n", " 'index': 184,\n", " 'word': 'would',\n", " 'start': 755,\n", " 'end': 760},\n", " {'entity': 'ok',\n", " 'score': 0.8691749,\n", " 'index': 185,\n", " 'word': 'be',\n", " 'start': 761,\n", " 'end': 763},\n", " {'entity': 'ok',\n", " 'score': 0.7643121,\n", " 'index': 186,\n", " 'word': 'a',\n", " 'start': 764,\n", " 'end': 765},\n", " {'entity': 'ok',\n", " 'score': 0.92725027,\n", " 'index': 187,\n", " 'word': 'good',\n", " 'start': 766,\n", " 'end': 770},\n", " {'entity': 'ok',\n", " 'score': 0.98407435,\n", " 'index': 188,\n", " 'word': 'way',\n", " 'start': 771,\n", " 'end': 774},\n", " {'entity': 'ok',\n", " 'score': 0.9118401,\n", " 'index': 189,\n", " 'word': 'to',\n", " 'start': 775,\n", " 'end': 777},\n", " {'entity': 'typo',\n", " 'score': 0.80820084,\n", " 'index': 190,\n", " 'word': 'engage',\n", " 'start': 778,\n", " 'end': 784},\n", " {'entity': 'ok',\n", " 'score': 0.9221322,\n", " 'index': 191,\n", " 'word': 'the',\n", " 'start': 785,\n", " 'end': 788},\n", " {'entity': 'ok',\n", " 'score': 0.8702925,\n", " 'index': 192,\n", " 'word': 'public',\n", " 'start': 789,\n", " 'end': 795},\n", " {'entity': 'ok',\n", " 'score': 0.96217585,\n", " 'index': 193,\n", " 'word': 'with',\n", " 'start': 796,\n", " 'end': 800},\n", " {'entity': 'ok',\n", " 'score': 0.5905169,\n", " 'index': 194,\n", " 'word': 'NASA',\n", " 'start': 801,\n", " 'end': 805},\n", " {'entity': 'ok',\n", " 'score': 0.52509785,\n", " 'index': 195,\n", " 'word': \"'\",\n", " 'start': 805,\n", " 'end': 806},\n", " {'entity': 'typo',\n", " 'score': 0.62146723,\n", " 'index': 196,\n", " 'word': 's',\n", " 'start': 806,\n", " 'end': 807},\n", " {'entity': 'typo',\n", " 'score': 0.79807454,\n", " 'index': 197,\n", " 'word': 'findings',\n", " 'start': 808,\n", " 'end': 816},\n", " {'entity': 'typo',\n", " 'score': 0.99402726,\n", " 'index': 198,\n", " 'word': ',',\n", " 'start': 816,\n", " 'end': 817},\n", " {'entity': 'ok',\n", " 'score': 0.49941942,\n", " 'index': 199,\n", " 'word': 'and',\n", " 'start': 818,\n", " 'end': 821},\n", " {'entity': 'typo',\n", " 'score': 0.9993143,\n", " 'index': 200,\n", " 'word': 'at',\n", " 'start': 822,\n", " 'end': 824},\n", " {'entity': 'ok',\n", " 'score': 0.9600053,\n", " 'index': 201,\n", " 'word': '##rra',\n", " 'start': 824,\n", " 'end': 827},\n", " {'entity': 'ok',\n", " 'score': 0.98252696,\n", " 'index': 202,\n", " 'word': '##ct',\n", " 'start': 827,\n", " 'end': 829},\n", " {'entity': 'ok',\n", " 'score': 0.9993383,\n", " 'index': 203,\n", " 'word': 'attention',\n", " 'start': 830,\n", " 'end': 839},\n", " {'entity': 'ok',\n", " 'score': 0.8149123,\n", " 'index': 204,\n", " 'word': 'to',\n", " 'start': 840,\n", " 'end': 842},\n", " {'entity': 'ok',\n", " 'score': 0.928141,\n", " 'index': 205,\n", " 'word': 'Mars',\n", " 'start': 843,\n", " 'end': 847},\n", " {'entity': 'typo',\n", " 'score': 0.98144484,\n", " 'index': 206,\n", " 'word': '-',\n", " 'start': 847,\n", " 'end': 848},\n", " {'entity': 'typo',\n", " 'score': 0.9430255,\n", " 'index': 207,\n", " 'word': '-',\n", " 'start': 848,\n", " 'end': 849},\n", " {'entity': 'ok',\n", " 'score': 0.9964923,\n", " 'index': 208,\n", " 'word': 'and',\n", " 'start': 850,\n", " 'end': 853},\n", " {'entity': 'ok',\n", " 'score': 0.9620715,\n", " 'index': 209,\n", " 'word': 'it',\n", " 'start': 854,\n", " 'end': 856},\n", " {'entity': 'ok',\n", " 'score': 0.9969836,\n", " 'index': 210,\n", " 'word': 'did',\n", " 'start': 857,\n", " 'end': 860},\n", " {'entity': 'ok',\n", " 'score': 0.42969742,\n", " 'index': 211,\n", " 'word': '.',\n", " 'start': 860,\n", " 'end': 861},\n", " {'entity': 'ok',\n", " 'score': 0.7296629,\n", " 'index': 212,\n", " 'word': 'The',\n", " 'start': 863,\n", " 'end': 866},\n", " {'entity': 'ok',\n", " 'score': 0.6601624,\n", " 'index': 213,\n", " 'word': 'face',\n", " 'start': 867,\n", " 'end': 871},\n", " {'entity': 'ok',\n", " 'score': 0.9987452,\n", " 'index': 214,\n", " 'word': 'on',\n", " 'start': 872,\n", " 'end': 874},\n", " {'entity': 'ok',\n", " 'score': 0.995214,\n", " 'index': 215,\n", " 'word': 'Mars',\n", " 'start': 875,\n", " 'end': 879},\n", " {'entity': 'typo',\n", " 'score': 0.76099324,\n", " 'index': 216,\n", " 'word': 'soon',\n", " 'start': 880,\n", " 'end': 884},\n", " {'entity': 'ok',\n", " 'score': 0.93993735,\n", " 'index': 217,\n", " 'word': 'became',\n", " 'start': 885,\n", " 'end': 891},\n", " {'entity': 'ok',\n", " 'score': 0.76330084,\n", " 'index': 218,\n", " 'word': 'a',\n", " 'start': 892,\n", " 'end': 893},\n", " {'entity': 'typo',\n", " 'score': 0.7646893,\n", " 'index': 219,\n", " 'word': 'pop',\n", " 'start': 894,\n", " 'end': 897},\n", " {'entity': 'ok',\n", " 'score': 0.99559265,\n", " 'index': 220,\n", " 'word': 'i',\n", " 'start': 898,\n", " 'end': 899},\n", " {'entity': 'ok',\n", " 'score': 0.99413526,\n", " 'index': 221,\n", " 'word': '##con',\n", " 'start': 899,\n", " 'end': 902},\n", " {'entity': 'ok',\n", " 'score': 0.7378378,\n", " 'index': 222,\n", " 'word': ';',\n", " 'start': 902,\n", " 'end': 903},\n", " {'entity': 'typo',\n", " 'score': 0.90965664,\n", " 'index': 223,\n", " 'word': 'shot',\n", " 'start': 904,\n", " 'end': 908},\n", " {'entity': 'ok',\n", " 'score': 0.998401,\n", " 'index': 224,\n", " 'word': 'in',\n", " 'start': 909,\n", " 'end': 911},\n", " {'entity': 'ok',\n", " 'score': 0.99020493,\n", " 'index': 225,\n", " 'word': 'movies',\n", " 'start': 912,\n", " 'end': 918},\n", " {'entity': 'ok',\n", " 'score': 0.8732322,\n", " 'index': 226,\n", " 'word': ',',\n", " 'start': 918,\n", " 'end': 919},\n", " {'entity': 'typo',\n", " 'score': 0.9965013,\n", " 'index': 227,\n", " 'word': 'appeared',\n", " 'start': 920,\n", " 'end': 928},\n", " {'entity': 'ok',\n", " 'score': 0.99827015,\n", " 'index': 228,\n", " 'word': 'in',\n", " 'start': 929,\n", " 'end': 931},\n", " {'entity': 'ok',\n", " 'score': 0.94189245,\n", " 'index': 229,\n", " 'word': 'books',\n", " 'start': 932,\n", " 'end': 937},\n", " {'entity': 'ok',\n", " 'score': 0.99873203,\n", " 'index': 230,\n", " 'word': ',',\n", " 'start': 937,\n", " 'end': 938},\n", " {'entity': 'ok',\n", " 'score': 0.788286,\n", " 'index': 231,\n", " 'word': 'magazines',\n", " 'start': 939,\n", " 'end': 948},\n", " {'entity': 'ok',\n", " 'score': 0.9985929,\n", " 'index': 232,\n", " 'word': ',',\n", " 'start': 948,\n", " 'end': 949},\n", " {'entity': 'typo',\n", " 'score': 0.83558345,\n", " 'index': 233,\n", " 'word': 'radio',\n", " 'start': 950,\n", " 'end': 955},\n", " {'entity': 'ok',\n", " 'score': 0.9941037,\n", " 'index': 234,\n", " 'word': 'talk',\n", " 'start': 956,\n", " 'end': 960},\n", " {'entity': 'ok',\n", " 'score': 0.9955338,\n", " 'index': 235,\n", " 'word': 'shows',\n", " 'start': 961,\n", " 'end': 966},\n", " {'entity': 'typo',\n", " 'score': 0.8871708,\n", " 'index': 236,\n", " 'word': ',',\n", " 'start': 966,\n", " 'end': 967},\n", " {'entity': 'typo',\n", " 'score': 0.5271952,\n", " 'index': 237,\n", " 'word': 'and',\n", " 'start': 968,\n", " 'end': 971},\n", " {'entity': 'typo',\n", " 'score': 0.99819934,\n", " 'index': 238,\n", " 'word': 'hau',\n", " 'start': 972,\n", " 'end': 975},\n", " {'entity': 'ok',\n", " 'score': 0.8323784,\n", " 'index': 239,\n", " 'word': '##nted',\n", " 'start': 975,\n", " 'end': 979},\n", " {'entity': 'typo',\n", " 'score': 0.97368705,\n", " 'index': 240,\n", " 'word': 'gr',\n", " 'start': 980,\n", " 'end': 982},\n", " {'entity': 'ok',\n", " 'score': 0.54475385,\n", " 'index': 241,\n", " 'word': '##oce',\n", " 'start': 982,\n", " 'end': 985},\n", " {'entity': 'ok',\n", " 'score': 0.9955764,\n", " 'index': 242,\n", " 'word': '##ry',\n", " 'start': 985,\n", " 'end': 987},\n", " {'entity': 'ok',\n", " 'score': 0.9983236,\n", " 'index': 243,\n", " 'word': 'store',\n", " 'start': 988,\n", " 'end': 993},\n", " {'entity': 'ok',\n", " 'score': 0.7288064,\n", " 'index': 244,\n", " 'word': 'check',\n", " 'start': 994,\n", " 'end': 999},\n", " {'entity': 'ok',\n", " 'score': 0.9966523,\n", " 'index': 245,\n", " 'word': '##out',\n", " 'start': 999,\n", " 'end': 1002},\n", " {'entity': 'ok',\n", " 'score': 0.99845624,\n", " 'index': 246,\n", " 'word': 'lines',\n", " 'start': 1003,\n", " 'end': 1008},\n", " {'entity': 'typo',\n", " 'score': 0.6110894,\n", " 'index': 247,\n", " 'word': 'for',\n", " 'start': 1009,\n", " 'end': 1012},\n", " {'entity': 'ok',\n", " 'score': 0.7403333,\n", " 'index': 248,\n", " 'word': '25',\n", " 'start': 1013,\n", " 'end': 1015},\n", " {'entity': 'ok',\n", " 'score': 0.90443635,\n", " 'index': 249,\n", " 'word': 'years',\n", " 'start': 1016,\n", " 'end': 1021},\n", " {'entity': 'typo',\n", " 'score': 0.7376988,\n", " 'index': 250,\n", " 'word': '.',\n", " 'start': 1021,\n", " 'end': 1022},\n", " {'entity': 'typo',\n", " 'score': 0.7852515,\n", " 'index': 251,\n", " 'word': 'Some',\n", " 'start': 1023,\n", " 'end': 1027},\n", " {'entity': 'ok',\n", " 'score': 0.76370704,\n", " 'index': 252,\n", " 'word': 'people',\n", " 'start': 1028,\n", " 'end': 1034},\n", " {'entity': 'typo',\n", " 'score': 0.62583363,\n", " 'index': 253,\n", " 'word': 'thought',\n", " 'start': 1035,\n", " 'end': 1042},\n", " {'entity': 'ok',\n", " 'score': 0.5030125,\n", " 'index': 254,\n", " 'word': 'the',\n", " 'start': 1043,\n", " 'end': 1046},\n", " {'entity': 'typo',\n", " 'score': 0.720193,\n", " 'index': 255,\n", " 'word': 'natural',\n", " 'start': 1047,\n", " 'end': 1054},\n", " {'entity': 'ok',\n", " 'score': 0.9988502,\n", " 'index': 256,\n", " 'word': 'land',\n", " 'start': 1055,\n", " 'end': 1059},\n", " {'entity': 'ok',\n", " 'score': 0.97257465,\n", " 'index': 257,\n", " 'word': '##form',\n", " 'start': 1059,\n", " 'end': 1063},\n", " {'entity': 'ok',\n", " 'score': 0.8562199,\n", " 'index': 258,\n", " 'word': 'was',\n", " 'start': 1064,\n", " 'end': 1067},\n", " {'entity': 'typo',\n", " 'score': 0.8256486,\n", " 'index': 259,\n", " 'word': 'evidence',\n", " 'start': 1068,\n", " 'end': 1076},\n", " {'entity': 'ok',\n", " 'score': 0.99360114,\n", " 'index': 260,\n", " 'word': 'of',\n", " 'start': 1077,\n", " 'end': 1079},\n", " {'entity': 'ok',\n", " 'score': 0.8346857,\n", " 'index': 261,\n", " 'word': 'life',\n", " 'start': 1080,\n", " 'end': 1084},\n", " {'entity': 'ok',\n", " 'score': 0.982291,\n", " 'index': 262,\n", " 'word': 'on',\n", " 'start': 1085,\n", " 'end': 1087},\n", " {'entity': 'ok',\n", " 'score': 0.97399634,\n", " 'index': 263,\n", " 'word': 'Mars',\n", " 'start': 1088,\n", " 'end': 1092},\n", " {'entity': 'typo',\n", " 'score': 0.90118784,\n", " 'index': 264,\n", " 'word': ',',\n", " 'start': 1092,\n", " 'end': 1093},\n", " {'entity': 'ok',\n", " 'score': 0.59052175,\n", " 'index': 265,\n", " 'word': 'and',\n", " 'start': 1094,\n", " 'end': 1097},\n", " {'entity': 'typo',\n", " 'score': 0.9531032,\n", " 'index': 266,\n", " 'word': 'that',\n", " 'start': 1098,\n", " 'end': 1102},\n", " {'entity': 'typo',\n", " 'score': 0.5538794,\n", " 'index': 267,\n", " 'word': 'us',\n", " 'start': 1103,\n", " 'end': 1105},\n", " {'entity': 'ok',\n", " 'score': 0.996549,\n", " 'index': 268,\n", " 'word': 'scientists',\n", " 'start': 1106,\n", " 'end': 1116},\n", " {'entity': 'typo',\n", " 'score': 0.7577664,\n", " 'index': 269,\n", " 'word': 'wanted',\n", " 'start': 1117,\n", " 'end': 1123},\n", " {'entity': 'ok',\n", " 'score': 0.94258755,\n", " 'index': 270,\n", " 'word': 'to',\n", " 'start': 1124,\n", " 'end': 1126},\n", " {'entity': 'typo',\n", " 'score': 0.901848,\n", " 'index': 271,\n", " 'word': 'hide',\n", " 'start': 1127,\n", " 'end': 1131},\n", " {'entity': 'ok',\n", " 'score': 0.87200266,\n", " 'index': 272,\n", " 'word': 'it',\n", " 'start': 1132,\n", " 'end': 1134},\n", " {'entity': 'typo',\n", " 'score': 0.93054014,\n", " 'index': 273,\n", " 'word': ',',\n", " 'start': 1134,\n", " 'end': 1135},\n", " {'entity': 'ok',\n", " 'score': 0.7310256,\n", " 'index': 274,\n", " 'word': 'but',\n", " 'start': 1136,\n", " 'end': 1139},\n", " {'entity': 'ok',\n", " 'score': 0.98565906,\n", " 'index': 275,\n", " 'word': 'really',\n", " 'start': 1140,\n", " 'end': 1146},\n", " {'entity': 'ok',\n", " 'score': 0.94660145,\n", " 'index': 276,\n", " 'word': ',',\n", " 'start': 1146,\n", " 'end': 1147},\n", " {'entity': 'typo',\n", " 'score': 0.9548484,\n", " 'index': 277,\n", " 'word': 'the',\n", " 'start': 1148,\n", " 'end': 1151},\n", " {'entity': 'ok',\n", " 'score': 0.85286355,\n", " 'index': 278,\n", " 'word': 'defender',\n", " 'start': 1152,\n", " 'end': 1160},\n", " {'entity': 'ok',\n", " 'score': 0.919405,\n", " 'index': 279,\n", " 'word': '##s',\n", " 'start': 1160,\n", " 'end': 1161},\n", " {'entity': 'ok',\n", " 'score': 0.97006005,\n", " 'index': 280,\n", " 'word': 'of',\n", " 'start': 1162,\n", " 'end': 1164},\n", " {'entity': 'ok',\n", " 'score': 0.929363,\n", " 'index': 281,\n", " 'word': 'the',\n", " 'start': 1165,\n", " 'end': 1168},\n", " {'entity': 'ok',\n", " 'score': 0.7226242,\n", " 'index': 282,\n", " 'word': 'NASA',\n", " 'start': 1169,\n", " 'end': 1173},\n", " {'entity': 'typo',\n", " 'score': 0.8097878,\n", " 'index': 283,\n", " 'word': 'budget',\n", " 'start': 1174,\n", " 'end': 1180},\n", " {'entity': 'ok',\n", " 'score': 0.95355994,\n", " 'index': 284,\n", " 'word': 'wish',\n", " 'start': 1181,\n", " 'end': 1185},\n", " {'entity': 'typo',\n", " 'score': 0.9236313,\n", " 'index': 285,\n", " 'word': 'there',\n", " 'start': 1186,\n", " 'end': 1191},\n", " {'entity': 'ok',\n", " 'score': 0.7190513,\n", " 'index': 286,\n", " 'word': 'was',\n", " 'start': 1192,\n", " 'end': 1195},\n", " {'entity': 'ok',\n", " 'score': 0.66554,\n", " 'index': 287,\n", " 'word': 'ancient',\n", " 'start': 1196,\n", " 'end': 1203},\n", " {'entity': 'ok',\n", " 'score': 0.60441756,\n", " 'index': 288,\n", " 'word': 'civili',\n", " 'start': 1204,\n", " 'end': 1210},\n", " {'entity': 'typo',\n", " 'score': 0.6450192,\n", " 'index': 289,\n", " 'word': '##zation',\n", " 'start': 1210,\n", " 'end': 1216},\n", " {'entity': 'ok',\n", " 'score': 0.6214468,\n", " 'index': 290,\n", " 'word': 'on',\n", " 'start': 1217,\n", " 'end': 1219},\n", " {'entity': 'ok',\n", " 'score': 0.9483295,\n", " 'index': 291,\n", " 'word': 'Mars',\n", " 'start': 1220,\n", " 'end': 1224},\n", " {'entity': 'typo',\n", " 'score': 0.7586299,\n", " 'index': 292,\n", " 'word': '.',\n", " 'start': 1224,\n", " 'end': 1225},\n", " {'entity': 'typo',\n", " 'score': 0.8658028,\n", " 'index': 293,\n", " 'word': 'We',\n", " 'start': 1226,\n", " 'end': 1228},\n", " {'entity': 'typo',\n", " 'score': 0.65636414,\n", " 'index': 294,\n", " 'word': 'decided',\n", " 'start': 1229,\n", " 'end': 1236},\n", " {'entity': 'ok',\n", " 'score': 0.997926,\n", " 'index': 295,\n", " 'word': 'to',\n", " 'start': 1237,\n", " 'end': 1239},\n", " {'entity': 'typo',\n", " 'score': 0.5479873,\n", " 'index': 296,\n", " 'word': 'take',\n", " 'start': 1240,\n", " 'end': 1244},\n", " {'entity': 'ok',\n", " 'score': 0.99164516,\n", " 'index': 297,\n", " 'word': 'another',\n", " 'start': 1245,\n", " 'end': 1252},\n", " {'entity': 'ok',\n", " 'score': 0.9741786,\n", " 'index': 298,\n", " 'word': 'shot',\n", " 'start': 1253,\n", " 'end': 1257},\n", " {'entity': 'typo',\n", " 'score': 0.98279405,\n", " 'index': 299,\n", " 'word': 'just',\n", " 'start': 1258,\n", " 'end': 1262},\n", " {'entity': 'ok',\n", " 'score': 0.86857635,\n", " 'index': 300,\n", " 'word': 'to',\n", " 'start': 1263,\n", " 'end': 1265},\n", " {'entity': 'ok',\n", " 'score': 0.5942953,\n", " 'index': 301,\n", " 'word': 'make',\n", " 'start': 1266,\n", " 'end': 1270},\n", " {'entity': 'ok',\n", " 'score': 0.9985331,\n", " 'index': 302,\n", " 'word': 'sure',\n", " 'start': 1271,\n", " 'end': 1275},\n", " {'entity': 'typo',\n", " 'score': 0.7710986,\n", " 'index': 303,\n", " 'word': 'we',\n", " 'start': 1276,\n", " 'end': 1278},\n", " {'entity': 'ok',\n", " 'score': 0.6990816,\n", " 'index': 304,\n", " 'word': 'were',\n", " 'start': 1279,\n", " 'end': 1283},\n", " {'entity': 'ok',\n", " 'score': 0.8634561,\n", " 'index': 305,\n", " 'word': '##n',\n", " 'start': 1283,\n", " 'end': 1284},\n", " {'entity': 'ok',\n", " 'score': 0.99818283,\n", " 'index': 306,\n", " 'word': \"'\",\n", " 'start': 1284,\n", " 'end': 1285},\n", " {'entity': 'ok',\n", " 'score': 0.99803716,\n", " 'index': 307,\n", " 'word': 't',\n", " 'start': 1285,\n", " 'end': 1286},\n", " {'entity': 'ok',\n", " 'score': 0.9882159,\n", " 'index': 308,\n", " 'word': 'wrong',\n", " 'start': 1287,\n", " 'end': 1292},\n", " {'entity': 'typo',\n", " 'score': 0.88255525,\n", " 'index': 309,\n", " 'word': ',',\n", " 'start': 1292,\n", " 'end': 1293},\n", " {'entity': 'typo',\n", " 'score': 0.9208057,\n", " 'index': 310,\n", " 'word': 'on',\n", " 'start': 1294,\n", " 'end': 1296},\n", " {'entity': 'typo',\n", " 'score': 0.91730064,\n", " 'index': 311,\n", " 'word': 'April',\n", " 'start': 1297,\n", " 'end': 1302},\n", " {'entity': 'ok',\n", " 'score': 0.5459038,\n", " 'index': 312,\n", " 'word': '5',\n", " 'start': 1303,\n", " 'end': 1304},\n", " {'entity': 'ok',\n", " 'score': 0.5843325,\n", " 'index': 313,\n", " 'word': ',',\n", " 'start': 1304,\n", " 'end': 1305},\n", " {'entity': 'typo',\n", " 'score': 0.8402537,\n", " 'index': 314,\n", " 'word': '1998',\n", " 'start': 1306,\n", " 'end': 1310},\n", " {'entity': 'typo',\n", " 'score': 0.9408526,\n", " 'index': 315,\n", " 'word': '.',\n", " 'start': 1310,\n", " 'end': 1311},\n", " {'entity': 'typo',\n", " 'score': 0.9976394,\n", " 'index': 316,\n", " 'word': 'Michael',\n", " 'start': 1312,\n", " 'end': 1319},\n", " {'entity': 'ok',\n", " 'score': 0.96120375,\n", " 'index': 317,\n", " 'word': 'Mali',\n", " 'start': 1320,\n", " 'end': 1324},\n", " {'entity': 'ok',\n", " 'score': 0.923244,\n", " 'index': 318,\n", " 'word': '##n',\n", " 'start': 1324,\n", " 'end': 1325},\n", " {'entity': 'ok',\n", " 'score': 0.97935665,\n", " 'index': 319,\n", " 'word': 'and',\n", " 'start': 1326,\n", " 'end': 1329},\n", " {'entity': 'typo',\n", " 'score': 0.7523317,\n", " 'index': 320,\n", " 'word': 'his',\n", " 'start': 1330,\n", " 'end': 1333},\n", " {'entity': 'typo',\n", " 'score': 0.91937953,\n", " 'index': 321,\n", " 'word': 'Mars',\n", " 'start': 1334,\n", " 'end': 1338},\n", " {'entity': 'ok',\n", " 'score': 0.8556037,\n", " 'index': 322,\n", " 'word': 'Or',\n", " 'start': 1339,\n", " 'end': 1341},\n", " {'entity': 'ok',\n", " 'score': 0.84279835,\n", " 'index': 323,\n", " 'word': '##biter',\n", " 'start': 1341,\n", " 'end': 1346},\n", " {'entity': 'typo',\n", " 'score': 0.9986338,\n", " 'index': 324,\n", " 'word': 'camera',\n", " 'start': 1347,\n", " 'end': 1353},\n", " {'entity': 'ok',\n", " 'score': 0.90062076,\n", " 'index': 325,\n", " 'word': 'team',\n", " 'start': 1354,\n", " 'end': 1358},\n", " {'entity': 'typo',\n", " 'score': 0.91712064,\n", " 'index': 326,\n", " 'word': 'took',\n", " 'start': 1359,\n", " 'end': 1363},\n", " {'entity': 'ok',\n", " 'score': 0.785382,\n", " 'index': 327,\n", " 'word': 'a',\n", " 'start': 1364,\n", " 'end': 1365},\n", " {'entity': 'typo',\n", " 'score': 0.9040174,\n", " 'index': 328,\n", " 'word': 'picture',\n", " 'start': 1366,\n", " 'end': 1373},\n", " {'entity': 'ok',\n", " 'score': 0.9086188,\n", " 'index': 329,\n", " 'word': 'that',\n", " 'start': 1374,\n", " 'end': 1378},\n", " {'entity': 'typo',\n", " 'score': 0.65382206,\n", " 'index': 330,\n", " 'word': 'was',\n", " 'start': 1379,\n", " 'end': 1382},\n", " {'entity': 'ok',\n", " 'score': 0.95172024,\n", " 'index': 331,\n", " 'word': 'ten',\n", " 'start': 1383,\n", " 'end': 1386},\n", " {'entity': 'ok',\n", " 'score': 0.9873766,\n", " 'index': 332,\n", " 'word': 'times',\n", " 'start': 1387,\n", " 'end': 1392},\n", " {'entity': 'ok',\n", " 'score': 0.67130685,\n", " 'index': 333,\n", " 'word': 'sharp',\n", " 'start': 1393,\n", " 'end': 1398},\n", " {'entity': 'ok',\n", " 'score': 0.9719016,\n", " 'index': 334,\n", " 'word': '##er',\n", " 'start': 1398,\n", " 'end': 1400},\n", " {'entity': 'ok',\n", " 'score': 0.99000204,\n", " 'index': 335,\n", " 'word': 'than',\n", " 'start': 1401,\n", " 'end': 1405},\n", " {'entity': 'ok',\n", " 'score': 0.93401396,\n", " 'index': 336,\n", " 'word': 'the',\n", " 'start': 1406,\n", " 'end': 1409},\n", " {'entity': 'ok',\n", " 'score': 0.77596074,\n", " 'index': 337,\n", " 'word': 'original',\n", " 'start': 1410,\n", " 'end': 1418},\n", " {'entity': 'typo',\n", " 'score': 0.7519366,\n", " 'index': 338,\n", " 'word': 'Viking',\n", " 'start': 1419,\n", " 'end': 1425},\n", " {'entity': 'ok',\n", " 'score': 0.66892004,\n", " 'index': 339,\n", " 'word': 'photos',\n", " 'start': 1426,\n", " 'end': 1432},\n", " {'entity': 'typo',\n", " 'score': 0.98212206,\n", " 'index': 340,\n", " 'word': ',',\n", " 'start': 1432,\n", " 'end': 1433},\n", " {'entity': 'typo',\n", " 'score': 0.99039525,\n", " 'index': 341,\n", " 'word': 'reveal',\n", " 'start': 1434,\n", " 'end': 1440},\n", " {'entity': 'ok',\n", " 'score': 0.53409153,\n", " 'index': 342,\n", " 'word': '##ing',\n", " 'start': 1440,\n", " 'end': 1443},\n", " {'entity': 'typo',\n", " 'score': 0.86396545,\n", " 'index': 343,\n", " 'word': 'a',\n", " 'start': 1444,\n", " 'end': 1445},\n", " {'entity': 'typo',\n", " 'score': 0.6633433,\n", " 'index': 344,\n", " 'word': 'natural',\n", " 'start': 1446,\n", " 'end': 1453},\n", " {'entity': 'ok',\n", " 'score': 0.9973527,\n", " 'index': 345,\n", " 'word': 'land',\n", " 'start': 1454,\n", " 'end': 1458},\n", " {'entity': 'ok',\n", " 'score': 0.8840458,\n", " 'index': 346,\n", " 'word': '##form',\n", " 'start': 1458,\n", " 'end': 1462},\n", " {'entity': 'typo',\n", " 'score': 0.9950237,\n", " 'index': 347,\n", " 'word': ',',\n", " 'start': 1462,\n", " 'end': 1463},\n", " {'entity': 'typo',\n", " 'score': 0.9553996,\n", " 'index': 348,\n", " 'word': 'which',\n", " 'start': 1464,\n", " 'end': 1469},\n", " {'entity': 'typo',\n", " 'score': 0.9745846,\n", " 'index': 349,\n", " 'word': 'meant',\n", " 'start': 1470,\n", " 'end': 1475},\n", " {'entity': 'typo',\n", " 'score': 0.7203086,\n", " 'index': 350,\n", " 'word': 'no',\n", " 'start': 1476,\n", " 'end': 1478},\n", " {'entity': 'ok',\n", " 'score': 0.91405845,\n", " 'index': 351,\n", " 'word': 'alien',\n", " 'start': 1479,\n", " 'end': 1484},\n", " {'entity': 'ok',\n", " 'score': 0.99879634,\n", " 'index': 352,\n", " 'word': 'monument',\n", " 'start': 1485,\n", " 'end': 1493},\n", " {'entity': 'typo',\n", " 'score': 0.56567025,\n", " 'index': 353,\n", " 'word': '.',\n", " 'start': 1493,\n", " 'end': 1494},\n", " {'entity': 'typo',\n", " 'score': 0.971668,\n", " 'index': 354,\n", " 'word': '\"',\n", " 'start': 1495,\n", " 'end': 1496},\n", " {'entity': 'ok',\n", " 'score': 0.9891775,\n", " 'index': 355,\n", " 'word': 'But',\n", " 'start': 1496,\n", " 'end': 1499},\n", " {'entity': 'ok',\n", " 'score': 0.9991333,\n", " 'index': 356,\n", " 'word': 'that',\n", " 'start': 1500,\n", " 'end': 1504},\n", " {'entity': 'ok',\n", " 'score': 0.9964754,\n", " 'index': 357,\n", " 'word': 'picture',\n", " 'start': 1505,\n", " 'end': 1512},\n", " {'entity': 'ok',\n", " 'score': 0.89596707,\n", " 'index': 358,\n", " 'word': 'wasn',\n", " 'start': 1513,\n", " 'end': 1517},\n", " {'entity': 'ok',\n", " 'score': 0.986717,\n", " 'index': 359,\n", " 'word': \"'\",\n", " 'start': 1517,\n", " 'end': 1518},\n", " {'entity': 'ok',\n", " 'score': 0.98859054,\n", " 'index': 360,\n", " 'word': 't',\n", " 'start': 1518,\n", " 'end': 1519},\n", " {'entity': 'ok',\n", " 'score': 0.9508471,\n", " 'index': 361,\n", " 'word': 'very',\n", " 'start': 1520,\n", " 'end': 1524},\n", " {'entity': 'ok',\n", " 'score': 0.9967194,\n", " 'index': 362,\n", " 'word': 'clear',\n", " 'start': 1525,\n", " 'end': 1530},\n", " {'entity': 'ok',\n", " 'score': 0.69285977,\n", " 'index': 363,\n", " 'word': 'at',\n", " 'start': 1531,\n", " 'end': 1533},\n", " {'entity': 'ok',\n", " 'score': 0.9915514,\n", " 'index': 364,\n", " 'word': 'all',\n", " 'start': 1534,\n", " 'end': 1537},\n", " {'entity': 'ok',\n", " 'score': 0.5899336,\n", " 'index': 365,\n", " 'word': ',',\n", " 'start': 1537,\n", " 'end': 1538},\n", " {'entity': 'ok',\n", " 'score': 0.5996167,\n", " 'index': 366,\n", " 'word': 'which',\n", " 'start': 1539,\n", " 'end': 1544},\n", " {'entity': 'typo',\n", " 'score': 0.8670836,\n", " 'index': 367,\n", " 'word': 'could',\n", " 'start': 1545,\n", " 'end': 1550},\n", " {'entity': 'ok',\n", " 'score': 0.9761153,\n", " 'index': 368,\n", " 'word': 'mean',\n", " 'start': 1551,\n", " 'end': 1555},\n", " {'entity': 'typo',\n", " 'score': 0.7312487,\n", " 'index': 369,\n", " 'word': 'alien',\n", " 'start': 1556,\n", " 'end': 1561},\n", " {'entity': 'ok',\n", " 'score': 0.8062085,\n", " 'index': 370,\n", " 'word': 'marking',\n", " 'start': 1562,\n", " 'end': 1569},\n", " {'entity': 'typo',\n", " 'score': 0.80552965,\n", " 'index': 371,\n", " 'word': '##s',\n", " 'start': 1569,\n", " 'end': 1570},\n", " {'entity': 'typo',\n", " 'score': 0.97620255,\n", " 'index': 372,\n", " 'word': 'were',\n", " 'start': 1571,\n", " 'end': 1575},\n", " {'entity': 'typo',\n", " 'score': 0.91833067,\n", " 'index': 373,\n", " 'word': 'hidden',\n", " 'start': 1576,\n", " 'end': 1582},\n", " {'entity': 'typo',\n", " 'score': 0.8934933,\n", " 'index': 374,\n", " 'word': 'by',\n", " 'start': 1583,\n", " 'end': 1585},\n", " {'entity': 'typo',\n", " 'score': 0.9554798,\n", " 'index': 375,\n", " 'word': 'ha',\n", " 'start': 1586,\n", " 'end': 1588},\n", " {'entity': 'ok',\n", " 'score': 0.6562142,\n", " 'index': 376,\n", " 'word': '##ze',\n", " 'start': 1588,\n", " 'end': 1590},\n", " {'entity': 'ok',\n", " 'score': 0.64348394,\n", " 'index': 377,\n", " 'word': '\"',\n", " 'start': 1590,\n", " 'end': 1591},\n", " {'entity': 'ok',\n", " 'score': 0.949757,\n", " 'index': 378,\n", " 'word': 'Well',\n", " 'start': 1592,\n", " 'end': 1596},\n", " {'entity': 'ok',\n", " 'score': 0.99965537,\n", " 'index': 379,\n", " 'word': 'no',\n", " 'start': 1597,\n", " 'end': 1599},\n", " {'entity': 'ok',\n", " 'score': 0.9982284,\n", " 'index': 380,\n", " 'word': ',',\n", " 'start': 1599,\n", " 'end': 1600},\n", " {'entity': 'typo',\n", " 'score': 0.7986995,\n", " 'index': 381,\n", " 'word': 'ye',\n", " 'start': 1601,\n", " 'end': 1603},\n", " {'entity': 'ok',\n", " 'score': 0.9877543,\n", " 'index': 382,\n", " 'word': '##s',\n", " 'start': 1603,\n", " 'end': 1604},\n", " {'entity': 'ok',\n", " 'score': 0.9989477,\n", " 'index': 383,\n", " 'word': 'that',\n", " 'start': 1605,\n", " 'end': 1609},\n", " {'entity': 'typo',\n", " 'score': 0.8178804,\n", " 'index': 384,\n", " 'word': 'rum',\n", " 'start': 1610,\n", " 'end': 1613},\n", " {'entity': 'ok',\n", " 'score': 0.9868613,\n", " 'index': 385,\n", " 'word': '##or',\n", " 'start': 1613,\n", " 'end': 1615},\n", " {'entity': 'ok',\n", " 'score': 0.90991825,\n", " 'index': 386,\n", " 'word': 'started',\n", " 'start': 1616,\n", " 'end': 1623},\n", " {'entity': 'typo',\n", " 'score': 0.5242705,\n", " 'index': 387,\n", " 'word': ',',\n", " 'start': 1623,\n", " 'end': 1624},\n", " {'entity': 'ok',\n", " 'score': 0.9543154,\n", " 'index': 388,\n", " 'word': 'but',\n", " 'start': 1625,\n", " 'end': 1628},\n", " {'entity': 'ok',\n", " 'score': 0.86853844,\n", " 'index': 389,\n", " 'word': 'to',\n", " 'start': 1629,\n", " 'end': 1631},\n", " {'entity': 'ok',\n", " 'score': 0.7432325,\n", " 'index': 390,\n", " 'word': 'prove',\n", " 'start': 1632,\n", " 'end': 1637},\n", " {'entity': 'ok',\n", " 'score': 0.9467291,\n", " 'index': 391,\n", " 'word': 'them',\n", " 'start': 1638,\n", " 'end': 1642},\n", " {'entity': 'typo',\n", " 'score': 0.7128569,\n", " 'index': 392,\n", " 'word': 'wrong',\n", " 'start': 1643,\n", " 'end': 1648},\n", " {'entity': 'ok',\n", " 'score': 0.62498444,\n", " 'index': 393,\n", " 'word': 'on',\n", " 'start': 1649,\n", " 'end': 1651},\n", " {'entity': 'typo',\n", " 'score': 0.6839772,\n", " 'index': 394,\n", " 'word': 'April',\n", " 'start': 1652,\n", " 'end': 1657},\n", " {'entity': 'ok',\n", " 'score': 0.68691665,\n", " 'index': 395,\n", " 'word': '8',\n", " 'start': 1658,\n", " 'end': 1659},\n", " {'entity': 'ok',\n", " 'score': 0.84025437,\n", " 'index': 396,\n", " 'word': ',',\n", " 'start': 1659,\n", " 'end': 1660},\n", " {'entity': 'typo',\n", " 'score': 0.9157925,\n", " 'index': 397,\n", " 'word': '2001',\n", " 'start': 1661,\n", " 'end': 1665},\n", " {'entity': 'typo',\n", " 'score': 0.7844509,\n", " 'index': 398,\n", " 'word': 'we',\n", " 'start': 1666,\n", " 'end': 1668},\n", " {'entity': 'typo',\n", " 'score': 0.5589368,\n", " 'index': 399,\n", " 'word': 'decided',\n", " 'start': 1669,\n", " 'end': 1676},\n", " {'entity': 'ok',\n", " 'score': 0.99429286,\n", " 'index': 400,\n", " 'word': 'to',\n", " 'start': 1677,\n", " 'end': 1679},\n", " {'entity': 'typo',\n", " 'score': 0.84421164,\n", " 'index': 401,\n", " 'word': 'take',\n", " 'start': 1680,\n", " 'end': 1684},\n", " {'entity': 'ok',\n", " 'score': 0.978843,\n", " 'index': 402,\n", " 'word': 'another',\n", " 'start': 1685,\n", " 'end': 1692},\n", " {'entity': 'ok',\n", " 'score': 0.59987646,\n", " 'index': 403,\n", " 'word': 'picture',\n", " 'start': 1693,\n", " 'end': 1700},\n", " {'entity': 'typo',\n", " 'score': 0.7050702,\n", " 'index': 404,\n", " 'word': ',',\n", " 'start': 1700,\n", " 'end': 1701},\n", " {'entity': 'ok',\n", " 'score': 0.95890915,\n", " 'index': 405,\n", " 'word': 'making',\n", " 'start': 1702,\n", " 'end': 1708},\n", " {'entity': 'ok',\n", " 'score': 0.99975187,\n", " 'index': 406,\n", " 'word': 'sure',\n", " 'start': 1709,\n", " 'end': 1713},\n", " {'entity': 'ok',\n", " 'score': 0.98025346,\n", " 'index': 407,\n", " 'word': 'it',\n", " 'start': 1714,\n", " 'end': 1716},\n", " {'entity': 'ok',\n", " 'score': 0.83194023,\n", " 'index': 408,\n", " 'word': 'was',\n", " 'start': 1717,\n", " 'end': 1720},\n", " {'entity': 'typo',\n", " 'score': 0.9467474,\n", " 'index': 409,\n", " 'word': 'a',\n", " 'start': 1721,\n", " 'end': 1722},\n", " {'entity': 'typo',\n", " 'score': 0.91021883,\n", " 'index': 410,\n", " 'word': 'cloud',\n", " 'start': 1723,\n", " 'end': 1728},\n", " {'entity': 'ok',\n", " 'score': 0.9859971,\n", " 'index': 411,\n", " 'word': '##less',\n", " 'start': 1728,\n", " 'end': 1732},\n", " {'entity': 'typo',\n", " 'score': 0.98307884,\n", " 'index': 412,\n", " 'word': 'summer',\n", " 'start': 1733,\n", " 'end': 1739},\n", " {'entity': 'ok',\n", " 'score': 0.994193,\n", " 'index': 413,\n", " 'word': 'day',\n", " 'start': 1740,\n", " 'end': 1743},\n", " {'entity': 'typo',\n", " 'score': 0.4313977,\n", " 'index': 414,\n", " 'word': '.',\n", " 'start': 1743,\n", " 'end': 1744},\n", " {'entity': 'typo',\n", " 'score': 0.96405447,\n", " 'index': 415,\n", " 'word': 'Mali',\n", " 'start': 1745,\n", " 'end': 1749},\n", " {'entity': 'ok',\n", " 'score': 0.6155722,\n", " 'index': 416,\n", " 'word': '##n',\n", " 'start': 1749,\n", " 'end': 1750},\n", " {'entity': 'ok',\n", " 'score': 0.9290005,\n", " 'index': 417,\n", " 'word': \"'\",\n", " 'start': 1750,\n", " 'end': 1751},\n", " {'entity': 'ok',\n", " 'score': 0.90629065,\n", " 'index': 418,\n", " 'word': 's',\n", " 'start': 1751,\n", " 'end': 1752},\n", " {'entity': 'ok',\n", " 'score': 0.868706,\n", " 'index': 419,\n", " 'word': 'team',\n", " 'start': 1753,\n", " 'end': 1757},\n", " {'entity': 'ok',\n", " 'score': 0.8017526,\n", " 'index': 420,\n", " 'word': 'captured',\n", " 'start': 1758,\n", " 'end': 1766},\n", " {'entity': 'ok',\n", " 'score': 0.7134167,\n", " 'index': 421,\n", " 'word': 'an',\n", " 'start': 1767,\n", " 'end': 1769},\n", " {'entity': 'typo',\n", " 'score': 0.5621726,\n", " 'index': 422,\n", " 'word': 'ama',\n", " 'start': 1770,\n", " 'end': 1773},\n", " {'entity': 'ok',\n", " 'score': 0.7826029,\n", " 'index': 423,\n", " 'word': '##zing',\n", " 'start': 1773,\n", " 'end': 1777},\n", " {'entity': 'ok',\n", " 'score': 0.99448264,\n", " 'index': 424,\n", " 'word': 'photo',\n", " 'start': 1778,\n", " 'end': 1783},\n", " {'entity': 'ok',\n", " 'score': 0.73524755,\n", " 'index': 425,\n", " 'word': 'using',\n", " 'start': 1784,\n", " 'end': 1789},\n", " {'entity': 'ok',\n", " 'score': 0.6591615,\n", " 'index': 426,\n", " 'word': 'the',\n", " 'start': 1790,\n", " 'end': 1793},\n", " {'entity': 'typo',\n", " 'score': 0.86533505,\n", " 'index': 427,\n", " 'word': 'camera',\n", " 'start': 1794,\n", " 'end': 1800},\n", " {'entity': 'ok',\n", " 'score': 0.90541,\n", " 'index': 428,\n", " 'word': \"'\",\n", " 'start': 1800,\n", " 'end': 1801},\n", " {'entity': 'ok',\n", " 'score': 0.9281528,\n", " 'index': 429,\n", " 'word': 's',\n", " 'start': 1801,\n", " 'end': 1802},\n", " {'entity': 'typo',\n", " 'score': 0.8930355,\n", " 'index': 430,\n", " 'word': 'absolute',\n", " 'start': 1803,\n", " 'end': 1811},\n", " {'entity': 'ok',\n", " 'score': 0.9972451,\n", " 'index': 431,\n", " 'word': 'maximum',\n", " 'start': 1812,\n", " 'end': 1819},\n", " {'entity': 'ok',\n", " 'score': 0.999348,\n", " 'index': 432,\n", " 'word': 'revolution',\n", " 'start': 1820,\n", " 'end': 1830},\n", " {'entity': 'typo',\n", " 'score': 0.6555487,\n", " 'index': 433,\n", " 'word': '.',\n", " 'start': 1830,\n", " 'end': 1831},\n", " {'entity': 'ok',\n", " 'score': 0.5426304,\n", " 'index': 434,\n", " 'word': 'With',\n", " 'start': 1832,\n", " 'end': 1836},\n", " {'entity': 'ok',\n", " 'score': 0.9795935,\n", " 'index': 435,\n", " 'word': 'this',\n", " 'start': 1837,\n", " 'end': 1841},\n", " {'entity': 'ok',\n", " 'score': 0.9408998,\n", " 'index': 436,\n", " 'word': 'camera',\n", " 'start': 1842,\n", " 'end': 1848},\n", " {'entity': 'typo',\n", " 'score': 0.9998209,\n", " 'index': 437,\n", " 'word': 'you',\n", " 'start': 1849,\n", " 'end': 1852},\n", " {'entity': 'ok',\n", " 'score': 0.9576567,\n", " 'index': 438,\n", " 'word': 'can',\n", " 'start': 1853,\n", " 'end': 1856},\n", " {'entity': 'typo',\n", " 'score': 0.573261,\n", " 'index': 439,\n", " 'word': 'disc',\n", " 'start': 1857,\n", " 'end': 1861},\n", " {'entity': 'ok',\n", " 'score': 0.9691068,\n", " 'index': 440,\n", " 'word': '##ern',\n", " 'start': 1861,\n", " 'end': 1864},\n", " {'entity': 'ok',\n", " 'score': 0.8217143,\n", " 'index': 441,\n", " 'word': 'things',\n", " 'start': 1865,\n", " 'end': 1871},\n", " {'entity': 'typo',\n", " 'score': 0.82698673,\n", " 'index': 442,\n", " 'word': 'in',\n", " 'start': 1872,\n", " 'end': 1874},\n", " {'entity': 'typo',\n", " 'score': 0.78578687,\n", " 'index': 443,\n", " 'word': 'a',\n", " 'start': 1875,\n", " 'end': 1876},\n", " {'entity': 'typo',\n", " 'score': 0.74662477,\n", " 'index': 444,\n", " 'word': 'digital',\n", " 'start': 1877,\n", " 'end': 1884},\n", " {'entity': 'ok',\n", " 'score': 0.5583283,\n", " 'index': 445,\n", " 'word': 'image',\n", " 'start': 1885,\n", " 'end': 1890},\n", " {'entity': 'typo',\n", " 'score': 0.99453664,\n", " 'index': 446,\n", " 'word': ',',\n", " 'start': 1890,\n", " 'end': 1891},\n", " {'entity': 'typo',\n", " 'score': 0.99513924,\n", " 'index': 447,\n", " 'word': '3',\n", " 'start': 1892,\n", " 'end': 1893},\n", " {'entity': 'ok',\n", " 'score': 0.97854996,\n", " 'index': 448,\n", " 'word': 'times',\n", " 'start': 1894,\n", " 'end': 1899},\n", " {'entity': 'ok',\n", " 'score': 0.9518669,\n", " 'index': 449,\n", " 'word': 'bigger',\n", " 'start': 1900,\n", " 'end': 1906},\n", " {'entity': 'ok',\n", " 'score': 0.9959552,\n", " 'index': 450,\n", " 'word': 'than',\n", " 'start': 1907,\n", " 'end': 1911},\n", " {'entity': 'ok',\n", " 'score': 0.70804185,\n", " 'index': 451,\n", " 'word': 'the',\n", " 'start': 1912,\n", " 'end': 1915},\n", " {'entity': 'ok',\n", " 'score': 0.7743477,\n", " 'index': 452,\n", " 'word': 'pi',\n", " 'start': 1916,\n", " 'end': 1918},\n", " {'entity': 'ok',\n", " 'score': 0.8590469,\n", " 'index': 453,\n", " 'word': '##xel',\n", " 'start': 1918,\n", " 'end': 1921},\n", " {'entity': 'typo',\n", " 'score': 0.88074666,\n", " 'index': 454,\n", " 'word': 'size',\n", " 'start': 1922,\n", " 'end': 1926},\n", " {'entity': 'typo',\n", " 'score': 0.977126,\n", " 'index': 455,\n", " 'word': 'which',\n", " 'start': 1927,\n", " 'end': 1932},\n", " {'entity': 'typo',\n", " 'score': 0.71839124,\n", " 'index': 456,\n", " 'word': 'means',\n", " 'start': 1933,\n", " 'end': 1938},\n", " {'entity': 'ok',\n", " 'score': 0.7110502,\n", " 'index': 457,\n", " 'word': 'if',\n", " 'start': 1939,\n", " 'end': 1941},\n", " {'entity': 'ok',\n", " 'score': 0.72081035,\n", " 'index': 458,\n", " 'word': 'there',\n", " 'start': 1942,\n", " 'end': 1947},\n", " {'entity': 'ok',\n", " 'score': 0.90700245,\n", " 'index': 459,\n", " 'word': 'were',\n", " 'start': 1948,\n", " 'end': 1952},\n", " {'entity': 'ok',\n", " 'score': 0.9401142,\n", " 'index': 460,\n", " 'word': 'any',\n", " 'start': 1953,\n", " 'end': 1956},\n", " {'entity': 'ok',\n", " 'score': 0.8010327,\n", " 'index': 461,\n", " 'word': 'signs',\n", " 'start': 1957,\n", " 'end': 1962},\n", " {'entity': 'ok',\n", " 'score': 0.9774201,\n", " 'index': 462,\n", " 'word': 'of',\n", " 'start': 1963,\n", " 'end': 1965},\n", " {'entity': 'ok',\n", " 'score': 0.72982603,\n", " 'index': 463,\n", " 'word': 'life',\n", " 'start': 1966,\n", " 'end': 1970},\n", " {'entity': 'typo',\n", " 'score': 0.98334414,\n", " 'index': 464,\n", " 'word': ',',\n", " 'start': 1970,\n", " 'end': 1971},\n", " {'entity': 'typo',\n", " 'score': 0.9922644,\n", " 'index': 465,\n", " 'word': 'you',\n", " 'start': 1972,\n", " 'end': 1975},\n", " {'entity': 'ok',\n", " 'score': 0.9828555,\n", " 'index': 466,\n", " 'word': 'could',\n", " 'start': 1976,\n", " 'end': 1981},\n", " {'entity': 'ok',\n", " 'score': 0.9977181,\n", " 'index': 467,\n", " 'word': 'easily',\n", " 'start': 1982,\n", " 'end': 1988},\n", " {'entity': 'ok',\n", " 'score': 0.99048984,\n", " 'index': 468,\n", " 'word': 'see',\n", " 'start': 1989,\n", " 'end': 1992},\n", " {'entity': 'ok',\n", " 'score': 0.99270344,\n", " 'index': 469,\n", " 'word': 'what',\n", " 'start': 1993,\n", " 'end': 1997},\n", " {'entity': 'ok',\n", " 'score': 0.7724791,\n", " 'index': 470,\n", " 'word': 'they',\n", " 'start': 1998,\n", " 'end': 2002},\n", " {'entity': 'ok',\n", " 'score': 0.9829512,\n", " 'index': 471,\n", " 'word': 'were',\n", " 'start': 2003,\n", " 'end': 2007},\n", " {'entity': 'typo',\n", " 'score': 0.9201129,\n", " 'index': 472,\n", " 'word': '.',\n", " 'start': 2007,\n", " 'end': 2008},\n", " {'entity': 'typo',\n", " 'score': 0.51953334,\n", " 'index': 473,\n", " 'word': 'What',\n", " 'start': 2009,\n", " 'end': 2013},\n", " {'entity': 'ok',\n", " 'score': 0.9992943,\n", " 'index': 474,\n", " 'word': 'the',\n", " 'start': 2014,\n", " 'end': 2017},\n", " {'entity': 'ok',\n", " 'score': 0.98957855,\n", " 'index': 475,\n", " 'word': 'picture',\n", " 'start': 2018,\n", " 'end': 2025},\n", " {'entity': 'typo',\n", " 'score': 0.58303297,\n", " 'index': 476,\n", " 'word': 'showed',\n", " 'start': 2026,\n", " 'end': 2032},\n", " {'entity': 'typo',\n", " 'score': 0.8085752,\n", " 'index': 477,\n", " 'word': 'was',\n", " 'start': 2033,\n", " 'end': 2036},\n", " {'entity': 'typo',\n", " 'score': 0.9929074,\n", " 'index': 478,\n", " 'word': 'the',\n", " 'start': 2037,\n", " 'end': 2040},\n", " {'entity': 'typo',\n", " 'score': 0.9818252,\n", " 'index': 479,\n", " 'word': 'but',\n", " 'start': 2041,\n", " 'end': 2044},\n", " {'entity': 'ok',\n", " 'score': 0.5864657,\n", " 'index': 480,\n", " 'word': '##te',\n", " 'start': 2044,\n", " 'end': 2046},\n", " {'entity': 'ok',\n", " 'score': 0.9995679,\n", " 'index': 481,\n", " 'word': 'or',\n", " 'start': 2047,\n", " 'end': 2049},\n", " {'entity': 'ok',\n", " 'score': 0.8257528,\n", " 'index': 482,\n", " 'word': 'mesa',\n", " 'start': 2050,\n", " 'end': 2054},\n", " {'entity': 'typo',\n", " 'score': 0.78061324,\n", " 'index': 483,\n", " 'word': ',',\n", " 'start': 2054,\n", " 'end': 2055},\n", " {'entity': 'typo',\n", " 'score': 0.529569,\n", " 'index': 484,\n", " 'word': 'which',\n", " 'start': 2056,\n", " 'end': 2061},\n", " {'entity': 'typo',\n", " 'score': 0.98165315,\n", " 'index': 485,\n", " 'word': 'are',\n", " 'start': 2062,\n", " 'end': 2065},\n", " {'entity': 'ok',\n", " 'score': 0.7917957,\n", " 'index': 486,\n", " 'word': 'land',\n", " 'start': 2066,\n", " 'end': 2070},\n", " {'entity': 'ok',\n", " 'score': 0.8484906,\n", " 'index': 487,\n", " 'word': '##form',\n", " 'start': 2070,\n", " 'end': 2074},\n", " {'entity': 'typo',\n", " 'score': 0.5294084,\n", " 'index': 488,\n", " 'word': '##s',\n", " 'start': 2074,\n", " 'end': 2075},\n", " {'entity': 'typo',\n", " 'score': 0.66130173,\n", " 'index': 489,\n", " 'word': 'common',\n", " 'start': 2076,\n", " 'end': 2082},\n", " {'entity': 'typo',\n", " 'score': 0.79395646,\n", " 'index': 490,\n", " 'word': 'around',\n", " 'start': 2083,\n", " 'end': 2089},\n", " {'entity': 'typo',\n", " 'score': 0.80247337,\n", " 'index': 491,\n", " 'word': 'the',\n", " 'start': 2090,\n", " 'end': 2093},\n", " {'entity': 'typo',\n", " 'score': 0.90861714,\n", " 'index': 492,\n", " 'word': 'American',\n", " 'start': 2094,\n", " 'end': 2102},\n", " {'entity': 'ok',\n", " 'score': 0.916328,\n", " 'index': 493,\n", " 'word': 'West',\n", " 'start': 2103,\n", " 'end': 2107}]" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import pipeline\n", "\n", "typo_checker = pipeline(\n", " \"ner\",\n", " model=\"mrm8488/distilbert-base-multi-cased-finetuned-typo-detection\",\n", " tokenizer=\"mrm8488/distilbert-base-multi-cased-finetuned-typo-detection\"\n", ")\n", "\n", "result = typo_checker(text)\n", "result[1:-1]\n" ] }, { "cell_type": "code", "execution_count": 63, "id": "7b8311a5-68cc-45cb-b2a8-94495aa18113", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "ok 328\n", "typo 164\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word \n", "ok 7\n", " ##adow 3\n", " ##ara 1\n", " ##biter 1\n", " ##con 1\n", " ..\n", "typo when 1\n", " which 4\n", " wrong 1\n", " ye 1\n", " you 3\n", "Length: 298, dtype: int64" ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with open(\"5 metricas mrm8488.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()" ] }, { "cell_type": "markdown", "id": "25e685d8-ff8a-43ec-8ab0-958a89056ee8", "metadata": {}, "source": [ "## 6 sagorsarker/codeswitch-spaeng-ner-lince " ] }, { "cell_type": "code", "execution_count": 128, "id": "5e5dcd1c-f49d-486d-9d72-e0ce4498a565", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of the model checkpoint at sagorsarker/codeswitch-spaeng-ner-lince were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']\n", "- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n" ] }, { "data": { "text/plain": [ "[{'entity': 'B-ORG',\n", " 'score': 0.90762955,\n", " 'index': 8,\n", " 'word': 'NASA',\n", " 'start': 16,\n", " 'end': 20},\n", " {'entity': 'B-LOC',\n", " 'score': 0.45779723,\n", " 'index': 25,\n", " 'word': 'Mars',\n", " 'start': 96,\n", " 'end': 100},\n", " {'entity': 'B-LOC',\n", " 'score': 0.67780584,\n", " 'index': 37,\n", " 'word': 'Mars',\n", " 'start': 152,\n", " 'end': 156},\n", " {'entity': 'B-PROD',\n", " 'score': 0.823176,\n", " 'index': 60,\n", " 'word': 'Viking',\n", " 'start': 240,\n", " 'end': 246},\n", " {'entity': 'I-PROD',\n", " 'score': 0.6758249,\n", " 'index': 61,\n", " 'word': '1',\n", " 'start': 247,\n", " 'end': 248},\n", " {'entity': 'B-OTHER',\n", " 'score': 0.5148923,\n", " 'index': 98,\n", " 'word': 'Mart',\n", " 'start': 407,\n", " 'end': 411},\n", " {'entity': 'B-OTHER',\n", " 'score': 0.42019445,\n", " 'index': 99,\n", " 'word': '##ian',\n", " 'start': 411,\n", " 'end': 414},\n", " {'entity': 'B-LOC',\n", " 'score': 0.76582533,\n", " 'index': 104,\n", " 'word': 'C',\n", " 'start': 435,\n", " 'end': 436},\n", " {'entity': 'B-LOC',\n", " 'score': 0.74930793,\n", " 'index': 105,\n", " 'word': '##yd',\n", " 'start': 436,\n", " 'end': 438},\n", " {'entity': 'I-LOC',\n", " 'score': 0.34308487,\n", " 'index': 106,\n", " 'word': '##onia',\n", " 'start': 438,\n", " 'end': 442},\n", " {'entity': 'B-OTHER',\n", " 'score': 0.60952294,\n", " 'index': 121,\n", " 'word': 'Egypt',\n", " 'start': 496,\n", " 'end': 501},\n", " {'entity': 'I-OTHER',\n", " 'score': 0.6259159,\n", " 'index': 122,\n", " 'word': '##ion',\n", " 'start': 501,\n", " 'end': 504},\n", " {'entity': 'B-ORG',\n", " 'score': 0.9731985,\n", " 'index': 194,\n", " 'word': 'NASA',\n", " 'start': 801,\n", " 'end': 805},\n", " {'entity': 'B-LOC',\n", " 'score': 0.62910813,\n", " 'index': 205,\n", " 'word': 'Mars',\n", " 'start': 843,\n", " 'end': 847},\n", " {'entity': 'B-LOC',\n", " 'score': 0.6685378,\n", " 'index': 215,\n", " 'word': 'Mars',\n", " 'start': 875,\n", " 'end': 879},\n", " {'entity': 'B-LOC',\n", " 'score': 0.71298903,\n", " 'index': 263,\n", " 'word': 'Mars',\n", " 'start': 1088,\n", " 'end': 1092},\n", " {'entity': 'B-ORG',\n", " 'score': 0.9437394,\n", " 'index': 282,\n", " 'word': 'NASA',\n", " 'start': 1169,\n", " 'end': 1173},\n", " {'entity': 'B-LOC',\n", " 'score': 0.60219324,\n", " 'index': 291,\n", " 'word': 'Mars',\n", " 'start': 1220,\n", " 'end': 1224},\n", " {'entity': 'B-TIME',\n", " 'score': 0.9826582,\n", " 'index': 311,\n", " 'word': 'April',\n", " 'start': 1297,\n", " 'end': 1302},\n", " {'entity': 'I-TIME',\n", " 'score': 0.9323657,\n", " 'index': 312,\n", " 'word': '5',\n", " 'start': 1303,\n", " 'end': 1304},\n", " {'entity': 'I-TIME',\n", " 'score': 0.8260366,\n", " 'index': 313,\n", " 'word': ',',\n", " 'start': 1304,\n", " 'end': 1305},\n", " {'entity': 'I-TIME',\n", " 'score': 0.887798,\n", " 'index': 314,\n", " 'word': '1998',\n", " 'start': 1306,\n", " 'end': 1310},\n", " {'entity': 'B-PER',\n", " 'score': 0.9961196,\n", " 'index': 316,\n", " 'word': 'Michael',\n", " 'start': 1312,\n", " 'end': 1319},\n", " {'entity': 'I-PER',\n", " 'score': 0.99166673,\n", " 'index': 317,\n", " 'word': 'Mali',\n", " 'start': 1320,\n", " 'end': 1324},\n", " {'entity': 'I-PER',\n", " 'score': 0.9911315,\n", " 'index': 318,\n", " 'word': '##n',\n", " 'start': 1324,\n", " 'end': 1325},\n", " {'entity': 'B-PROD',\n", " 'score': 0.6550399,\n", " 'index': 321,\n", " 'word': 'Mars',\n", " 'start': 1334,\n", " 'end': 1338},\n", " {'entity': 'I-PROD',\n", " 'score': 0.80179524,\n", " 'index': 322,\n", " 'word': 'Or',\n", " 'start': 1339,\n", " 'end': 1341},\n", " {'entity': 'I-PROD',\n", " 'score': 0.8049131,\n", " 'index': 323,\n", " 'word': '##biter',\n", " 'start': 1341,\n", " 'end': 1346},\n", " {'entity': 'B-OTHER',\n", " 'score': 0.35082105,\n", " 'index': 338,\n", " 'word': 'Viking',\n", " 'start': 1419,\n", " 'end': 1425},\n", " {'entity': 'B-TIME',\n", " 'score': 0.97395533,\n", " 'index': 394,\n", " 'word': 'April',\n", " 'start': 1652,\n", " 'end': 1657},\n", " {'entity': 'I-TIME',\n", " 'score': 0.66215277,\n", " 'index': 395,\n", " 'word': '8',\n", " 'start': 1658,\n", " 'end': 1659},\n", " {'entity': 'B-GROUP',\n", " 'score': 0.5102115,\n", " 'index': 415,\n", " 'word': 'Mali',\n", " 'start': 1745,\n", " 'end': 1749},\n", " {'entity': 'B-GROUP',\n", " 'score': 0.34341586,\n", " 'index': 416,\n", " 'word': '##n',\n", " 'start': 1749,\n", " 'end': 1750},\n", " {'entity': 'B-LOC',\n", " 'score': 0.8453891,\n", " 'index': 492,\n", " 'word': 'American',\n", " 'start': 2094,\n", " 'end': 2102},\n", " {'entity': 'I-LOC',\n", " 'score': 0.663866,\n", " 'index': 493,\n", " 'word': 'West',\n", " 'start': 2103,\n", " 'end': 2107}]" ] }, "execution_count": 128, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(\"sagorsarker/codeswitch-spaeng-ner-lince\")\n", "\n", "model = AutoModelForTokenClassification.from_pretrained(\"sagorsarker/codeswitch-spaeng-ner-lince\")\n", "\n", "ner_model = pipeline('ner', model=model, tokenizer=tokenizer)\n", "\n", "ner_model(text)" ] }, { "cell_type": "code", "execution_count": 66, "id": "58c5a81d-c1c6-48db-b26d-edd1e01d1231", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "B-GROUP 2\n", "B-LOC 9\n", "B-ORG 3\n", "B-OTHER 4\n", "B-PER 1\n", "B-PROD 2\n", "B-TIME 2\n", "I-LOC 2\n", "I-OTHER 1\n", "I-PER 2\n", "I-PROD 3\n", "I-TIME 4\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word \n", "B-GROUP ##n 1\n", " Mali 1\n", "B-LOC ##yd 1\n", " American 1\n", " C 1\n", " Mars 6\n", "B-ORG NASA 3\n", "B-OTHER ##ian 1\n", " Egypt 1\n", " Mart 1\n", " Viking 1\n", "B-PER Michael 1\n", "B-PROD Mars 1\n", " Viking 1\n", "B-TIME April 2\n", "I-LOC ##onia 1\n", " West 1\n", "I-OTHER ##ion 1\n", "I-PER ##n 1\n", " Mali 1\n", "I-PROD ##biter 1\n", " 1 1\n", " Or 1\n", "I-TIME , 1\n", " 1998 1\n", " 5 1\n", " 8 1\n", "dtype: int64" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with open(\"6 sagorsarkercodeswitch-spaeng-ner-lince.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()" ] }, { "cell_type": "markdown", "id": "9c02ef71-62d9-40f1-98a8-138419cff895", "metadata": {}, "source": [ "## 7 gunghio/xlm-roberta-base-finetuned-panx-ner" ] }, { "cell_type": "code", "execution_count": 68, "id": "a8df4657-5842-43c0-97b8-15346f5a4578", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[{'entity_group': 'ORG', 'score': 0.9839335, 'word': 'NASA', 'start': 16, 'end': 20}, {'entity_group': 'ORG', 'score': 0.9257912, 'word': 'FaceOnMars,', 'start': 88, 'end': 101}, {'entity_group': 'LOC', 'score': 0.8524113, 'word': 'Mars,', 'start': 152, 'end': 157}, {'entity_group': 'ORG', 'score': 0.9470773, 'word': 'Viking1spacecraft', 'start': 240, 'end': 259}, {'entity_group': 'ORG', 'score': 0.6181985, 'word': 'Martian', 'start': 407, 'end': 414}, {'entity_group': 'LOC', 'score': 0.49310815, 'word': 'mesa,', 'start': 415, 'end': 420}, {'entity_group': 'LOC', 'score': 0.8700732, 'word': 'Cydonia,', 'start': 435, 'end': 443}, {'entity_group': 'ORG', 'score': 0.57100993, 'word': 'EgyptionPharaoh.', 'start': 496, 'end': 513}, {'entity_group': 'LOC', 'score': 0.46428245, 'word': 'formation', 'start': 622, 'end': 631}, {'entity_group': 'ORG', 'score': 0.90882486, 'word': \"NASA's\", 'start': 801, 'end': 807}, {'entity_group': 'LOC', 'score': 0.5685518, 'word': 'Mars--', 'start': 843, 'end': 849}, {'entity_group': 'LOC', 'score': 0.89254224, 'word': 'Mars', 'start': 875, 'end': 879}, {'entity_group': 'LOC', 'score': 0.8768112, 'word': 'Mars,', 'start': 1088, 'end': 1093}, {'entity_group': 'ORG', 'score': 0.88662714, 'word': 'NASAbudget', 'start': 1169, 'end': 1180}, {'entity_group': 'ORG', 'score': 0.5386654, 'word': 'ancientcivilization', 'start': 1196, 'end': 1216}, {'entity_group': 'LOC', 'score': 0.6887058, 'word': 'Mars.', 'start': 1220, 'end': 1225}, {'entity_group': 'PER', 'score': 0.86852753, 'word': 'MichaelMalin', 'start': 1312, 'end': 1325}, {'entity_group': 'ORG', 'score': 0.9244041, 'word': 'MarsOrbitercamerateam', 'start': 1334, 'end': 1358}, {'entity_group': 'ORG', 'score': 0.65044224, 'word': 'Vikingphotos,', 'start': 1419, 'end': 1433}, {'entity_group': 'ORG', 'score': 0.6091294, 'word': 'digitalimage,', 'start': 1877, 'end': 1891}, {'entity_group': 'LOC', 'score': 0.93108547, 'word': 'AmericanWest.', 'start': 2094, 'end': 2108}]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\NW\\anaconda3\\Lib\\site-packages\\transformers\\pipelines\\token_classification.py:392: UserWarning: Tokenizer does not support real words, using fallback heuristic\n", " warnings.warn(\n" ] } ], "source": [ "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers import pipeline\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(\"gunghio/xlm-roberta-base-finetuned-panx-ner\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"gunghio/xlm-roberta-base-finetuned-panx-ner\")\n", "\n", "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer, aggregation_strategy=\"first\")\n", "example = text\n", "\n", "ner_results = nlp(example)\n", "print(ner_results)" ] }, { "cell_type": "code", "execution_count": 69, "id": "15eb816d-ac29-448d-8520-fd3cd2d3d957", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity_group\n", "LOC 9\n", "ORG 11\n", "PER 1\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity_group word \n", "LOC AmericanWest. 1\n", " Cydonia, 1\n", " Mars 1\n", " Mars, 2\n", " Mars-- 1\n", " Mars. 1\n", " formation 1\n", " mesa, 1\n", "ORG EgyptionPharaoh. 1\n", " FaceOnMars, 1\n", " MarsOrbitercamerateam 1\n", " Martian 1\n", " NASA 1\n", " NASA's 1\n", " NASAbudget 1\n", " Viking1spacecraft 1\n", " Vikingphotos, 1\n", " ancientcivilization 1\n", " digitalimage, 1\n", "PER MichaelMalin 1\n", "dtype: int64" ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "with open(\"7 gunghioxlm-roberta-base-finetuned-panx-ner.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity_group']).size())\n", "aux.groupby(['entity_group', 'word']).size()" ] }, { "cell_type": "markdown", "id": "7a419ca8-e000-4bf0-81fa-de8970e98a22", "metadata": {}, "source": [ "## 8 51la5/roberta-large-NER" ] }, { "cell_type": "code", "execution_count": 71, "id": "92d64393-7a73-4803-9fa5-043de160cd45", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\NW\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n", "Some weights of the model checkpoint at 51la5/roberta-large-NER were not used when initializing XLMRobertaForTokenClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n", "- This IS expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n" ] }, { "data": { "text/plain": [ "[{'entity': 'I-ORG',\n", " 'score': 0.9999913,\n", " 'index': 8,\n", " 'word': '▁NASA',\n", " 'start': 16,\n", " 'end': 20},\n", " {'entity': 'I-MISC',\n", " 'score': 0.9999641,\n", " 'index': 23,\n", " 'word': '▁Face',\n", " 'start': 88,\n", " 'end': 92},\n", " {'entity': 'I-MISC',\n", " 'score': 0.99989665,\n", " 'index': 24,\n", " 'word': '▁On',\n", " 'start': 93,\n", " 'end': 95},\n", " {'entity': 'I-MISC',\n", " 'score': 0.97350365,\n", " 'index': 25,\n", " 'word': '▁Mars',\n", " 'start': 96,\n", " 'end': 100},\n", " {'entity': 'I-LOC',\n", " 'score': 0.9999362,\n", " 'index': 36,\n", " 'word': '▁Mars',\n", " 'start': 152,\n", " 'end': 156},\n", " {'entity': 'I-MISC',\n", " 'score': 0.9992086,\n", " 'index': 58,\n", " 'word': '▁Viking',\n", " 'start': 240,\n", " 'end': 246},\n", " {'entity': 'I-MISC',\n", " 'score': 0.9989502,\n", " 'index': 59,\n", " 'word': '▁1',\n", " 'start': 247,\n", " 'end': 248},\n", " {'entity': 'I-MISC',\n", " 'score': 0.999977,\n", " 'index': 97,\n", " 'word': '▁Marti',\n", " 'start': 407,\n", " 'end': 412},\n", " {'entity': 'I-MISC',\n", " 'score': 0.99619055,\n", " 'index': 98,\n", " 'word': 'an',\n", " 'start': 412,\n", " 'end': 414},\n", " {'entity': 'I-LOC',\n", " 'score': 0.9999354,\n", " 'index': 103,\n", " 'word': '▁Cy',\n", " 'start': 435,\n", " 'end': 437},\n", " {'entity': 'I-LOC',\n", " 'score': 0.99994576,\n", " 'index': 104,\n", " 'word': 'do',\n", " 'start': 437,\n", " 'end': 439},\n", " {'entity': 'I-LOC',\n", " 'score': 0.99992585,\n", " 'index': 105,\n", " 'word': 'nia',\n", " 'start': 439,\n", " 'end': 442},\n", " {'entity': 'I-MISC',\n", " 'score': 0.9999789,\n", " 'index': 119,\n", " 'word': '▁Egypt',\n", " 'start': 496,\n", " 'end': 501},\n", " {'entity': 'I-MISC',\n", " 'score': 0.9614088,\n", " 'index': 120,\n", " 'word': 'ion',\n", " 'start': 501,\n", " 'end': 504},\n", " {'entity': 'I-ORG',\n", " 'score': 0.99997246,\n", " 'index': 193,\n", " 'word': '▁NASA',\n", " 'start': 801,\n", " 'end': 805},\n", " {'entity': 'I-LOC',\n", " 'score': 0.99979633,\n", " 'index': 205,\n", " 'word': '▁Mars',\n", " 'start': 843,\n", " 'end': 847},\n", " {'entity': 'I-LOC',\n", " 'score': 0.9998061,\n", " 'index': 215,\n", " 'word': '▁Mars',\n", " 'start': 875,\n", " 'end': 879},\n", " {'entity': 'I-LOC',\n", " 'score': 0.99984956,\n", " 'index': 264,\n", " 'word': '▁Mars',\n", " 'start': 1088,\n", " 'end': 1092},\n", " {'entity': 'I-ORG',\n", " 'score': 0.99996305,\n", " 'index': 285,\n", " 'word': '▁NASA',\n", " 'start': 1169,\n", " 'end': 1173},\n", " {'entity': 'I-LOC',\n", " 'score': 0.9998203,\n", " 'index': 295,\n", " 'word': '▁Mars',\n", " 'start': 1220,\n", " 'end': 1224},\n", " {'entity': 'I-PER',\n", " 'score': 0.9999932,\n", " 'index': 319,\n", " 'word': '▁Michael',\n", " 'start': 1312,\n", " 'end': 1319},\n", " {'entity': 'I-PER',\n", " 'score': 0.99999106,\n", " 'index': 320,\n", " 'word': '▁Malin',\n", " 'start': 1320,\n", " 'end': 1325},\n", " {'entity': 'I-MISC',\n", " 'score': 0.94105357,\n", " 'index': 323,\n", " 'word': '▁Mars',\n", " 'start': 1334,\n", " 'end': 1338},\n", " {'entity': 'I-MISC',\n", " 'score': 0.9839579,\n", " 'index': 324,\n", " 'word': '▁Or',\n", " 'start': 1339,\n", " 'end': 1341},\n", " {'entity': 'I-MISC',\n", " 'score': 0.9913346,\n", " 'index': 325,\n", " 'word': 'bit',\n", " 'start': 1341,\n", " 'end': 1344},\n", " {'entity': 'I-MISC',\n", " 'score': 0.9759228,\n", " 'index': 326,\n", " 'word': 'er',\n", " 'start': 1344,\n", " 'end': 1346},\n", " {'entity': 'I-MISC',\n", " 'score': 0.999749,\n", " 'index': 341,\n", " 'word': '▁Viking',\n", " 'start': 1419,\n", " 'end': 1425},\n", " {'entity': 'I-PER',\n", " 'score': 0.9999914,\n", " 'index': 416,\n", " 'word': '▁Malin',\n", " 'start': 1745,\n", " 'end': 1750},\n", " {'entity': 'I-MISC',\n", " 'score': 0.92417294,\n", " 'index': 491,\n", " 'word': '▁American',\n", " 'start': 2094,\n", " 'end': 2102},\n", " {'entity': 'I-LOC',\n", " 'score': 0.99954396,\n", " 'index': 492,\n", " 'word': '▁West',\n", " 'start': 2103,\n", " 'end': 2107}]" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"51la5/roberta-large-NER\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"51la5/roberta-large-NER\")\n", "classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "classifier(text)" ] }, { "cell_type": "code", "execution_count": 72, "id": "9cf28bd0-f78c-48f1-98f0-e6b8dea07e21", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "I-LOC 9\n", "I-MISC 15\n", "I-ORG 3\n", "I-PER 3\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word \n", "I-LOC do 1\n", " nia 1\n", " ▁Cy 1\n", " ▁Mars 5\n", " ▁West 1\n", "I-MISC an 1\n", " bit 1\n", " er 1\n", " ion 1\n", " ▁1 1\n", " ▁American 1\n", " ▁Egypt 1\n", " ▁Face 1\n", " ▁Mars 2\n", " ▁Marti 1\n", " ▁On 1\n", " ▁Or 1\n", " ▁Viking 2\n", "I-ORG ▁NASA 3\n", "I-PER ▁Malin 2\n", " ▁Michael 1\n", "dtype: int64" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with open(\"8 51la5roberta-large-NER.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()" ] }, { "cell_type": "markdown", "id": "c6a46082-0ffa-413c-ae69-4ffbf2081e71", "metadata": {}, "source": [ "## 9 dmargutierrezdistilbert-base-multilingual-cased-mapa_coarse-ner" ] }, { "cell_type": "code", "execution_count": 74, "id": "e832ae0c-3ecd-4080-aeee-70b0fac8db0b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'entity': 'B-ADDRESS',\n", " 'score': 0.89953065,\n", " 'index': 104,\n", " 'word': 'C',\n", " 'start': 435,\n", " 'end': 436},\n", " {'entity': 'I-ADDRESS',\n", " 'score': 0.776557,\n", " 'index': 105,\n", " 'word': '##yd',\n", " 'start': 436,\n", " 'end': 438},\n", " {'entity': 'I-ADDRESS',\n", " 'score': 0.87639356,\n", " 'index': 106,\n", " 'word': '##onia',\n", " 'start': 438,\n", " 'end': 442},\n", " {'entity': 'B-AMOUNT',\n", " 'score': 0.94337094,\n", " 'index': 248,\n", " 'word': '25',\n", " 'start': 1013,\n", " 'end': 1015},\n", " {'entity': 'I-AMOUNT',\n", " 'score': 0.83502764,\n", " 'index': 249,\n", " 'word': 'years',\n", " 'start': 1016,\n", " 'end': 1021},\n", " {'entity': 'B-DATE',\n", " 'score': 0.993107,\n", " 'index': 311,\n", " 'word': 'April',\n", " 'start': 1297,\n", " 'end': 1302},\n", " {'entity': 'I-DATE',\n", " 'score': 0.9911287,\n", " 'index': 312,\n", " 'word': '5',\n", " 'start': 1303,\n", " 'end': 1304},\n", " {'entity': 'I-DATE',\n", " 'score': 0.9842742,\n", " 'index': 313,\n", " 'word': ',',\n", " 'start': 1304,\n", " 'end': 1305},\n", " {'entity': 'I-DATE',\n", " 'score': 0.9907127,\n", " 'index': 314,\n", " 'word': '1998',\n", " 'start': 1306,\n", " 'end': 1310},\n", " {'entity': 'B-PERSON',\n", " 'score': 0.9272426,\n", " 'index': 316,\n", " 'word': 'Michael',\n", " 'start': 1312,\n", " 'end': 1319},\n", " {'entity': 'I-PERSON',\n", " 'score': 0.9829417,\n", " 'index': 317,\n", " 'word': 'Mali',\n", " 'start': 1320,\n", " 'end': 1324},\n", " {'entity': 'I-PERSON',\n", " 'score': 0.96556324,\n", " 'index': 318,\n", " 'word': '##n',\n", " 'start': 1324,\n", " 'end': 1325},\n", " {'entity': 'B-ORGANISATION',\n", " 'score': 0.92684674,\n", " 'index': 321,\n", " 'word': 'Mars',\n", " 'start': 1334,\n", " 'end': 1338},\n", " {'entity': 'I-ORGANISATION',\n", " 'score': 0.9111312,\n", " 'index': 322,\n", " 'word': 'Or',\n", " 'start': 1339,\n", " 'end': 1341},\n", " {'entity': 'I-ORGANISATION',\n", " 'score': 0.8044608,\n", " 'index': 323,\n", " 'word': '##biter',\n", " 'start': 1341,\n", " 'end': 1346},\n", " {'entity': 'B-DATE',\n", " 'score': 0.9925527,\n", " 'index': 394,\n", " 'word': 'April',\n", " 'start': 1652,\n", " 'end': 1657},\n", " {'entity': 'I-DATE',\n", " 'score': 0.989077,\n", " 'index': 395,\n", " 'word': '8',\n", " 'start': 1658,\n", " 'end': 1659},\n", " {'entity': 'I-DATE',\n", " 'score': 0.9804143,\n", " 'index': 396,\n", " 'word': ',',\n", " 'start': 1659,\n", " 'end': 1660},\n", " {'entity': 'I-DATE',\n", " 'score': 0.989312,\n", " 'index': 397,\n", " 'word': '2001',\n", " 'start': 1661,\n", " 'end': 1665},\n", " {'entity': 'B-PERSON',\n", " 'score': 0.6016297,\n", " 'index': 415,\n", " 'word': 'Mali',\n", " 'start': 1745,\n", " 'end': 1749},\n", " {'entity': 'I-PERSON',\n", " 'score': 0.7576901,\n", " 'index': 416,\n", " 'word': '##n',\n", " 'start': 1749,\n", " 'end': 1750},\n", " {'entity': 'B-ADDRESS',\n", " 'score': 0.48142406,\n", " 'index': 492,\n", " 'word': 'American',\n", " 'start': 2094,\n", " 'end': 2102},\n", " {'entity': 'I-ADDRESS',\n", " 'score': 0.456201,\n", " 'index': 493,\n", " 'word': 'West',\n", " 'start': 2103,\n", " 'end': 2107}]" ] }, "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"dmargutierrez/distilbert-base-multilingual-cased-mapa_coarse-ner\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"dmargutierrez/distilbert-base-multilingual-cased-mapa_coarse-ner\")\n", "classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "classifier(text)" ] }, { "cell_type": "code", "execution_count": 75, "id": "3464c046-a851-4cb6-a7fe-5a71e03b2c70", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "B-ADDRESS 2\n", "B-AMOUNT 1\n", "B-DATE 2\n", "B-ORGANISATION 1\n", "B-PERSON 2\n", "I-ADDRESS 3\n", "I-AMOUNT 1\n", "I-DATE 6\n", "I-ORGANISATION 2\n", "I-PERSON 3\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word \n", "B-ADDRESS American 1\n", " C 1\n", "B-AMOUNT 25 1\n", "B-DATE April 2\n", "B-ORGANISATION Mars 1\n", "B-PERSON Mali 1\n", " Michael 1\n", "I-ADDRESS ##onia 1\n", " ##yd 1\n", " West 1\n", "I-AMOUNT years 1\n", "I-DATE , 2\n", " 1998 1\n", " 2001 1\n", " 5 1\n", " 8 1\n", "I-ORGANISATION ##biter 1\n", " Or 1\n", "I-PERSON ##n 2\n", " Mali 1\n", "dtype: int64" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with open(\"9 dmargutierrezdistilbert-base-multilingual-cased-mapa_coarse-ner.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()" ] }, { "cell_type": "markdown", "id": "d464a1a6-7472-4961-907c-a58292fb42b2", "metadata": {}, "source": [ "## 10 mbrutonspa_enpt_mBERT" ] }, { "cell_type": "code", "execution_count": 77, "id": "038960d7-d348-4025-b336-acd0b7a6827b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'entity': 'r0:arg1|tem',\n", " 'score': 0.8560656,\n", " 'index': 4,\n", " 'word': 'you',\n", " 'start': 7,\n", " 'end': 10},\n", " {'entity': 'r0:root',\n", " 'score': 0.99593973,\n", " 'index': 6,\n", " 'word': 're',\n", " 'start': 11,\n", " 'end': 13},\n", " {'entity': 'r1:arg1|tem',\n", " 'score': 0.7426193,\n", " 'index': 11,\n", " 'word': 'you',\n", " 'start': 32,\n", " 'end': 35},\n", " {'entity': 'r1:root',\n", " 'score': 0.94838774,\n", " 'index': 13,\n", " 'word': 'be',\n", " 'start': 43,\n", " 'end': 45},\n", " {'entity': 'r1:arg2|atr',\n", " 'score': 0.64391094,\n", " 'index': 14,\n", " 'word': 'able',\n", " 'start': 46,\n", " 'end': 50},\n", " {'entity': 'r2:root',\n", " 'score': 0.80442923,\n", " 'index': 16,\n", " 'word': 'tell',\n", " 'start': 54,\n", " 'end': 58},\n", " {'entity': 'r1:arg1|pat',\n", " 'score': 0.33438408,\n", " 'index': 20,\n", " 'word': 'story',\n", " 'start': 72,\n", " 'end': 77},\n", " {'entity': 'r4:arg1|tem',\n", " 'score': 0.34810358,\n", " 'index': 27,\n", " 'word': 'which',\n", " 'start': 102,\n", " 'end': 107},\n", " {'entity': 'r5:root',\n", " 'score': 0.5292147,\n", " 'index': 30,\n", " 'word': 'is',\n", " 'start': 118,\n", " 'end': 120},\n", " {'entity': 'r4:arg2|atr',\n", " 'score': 0.46503437,\n", " 'index': 31,\n", " 'word': 'evidence',\n", " 'start': 121,\n", " 'end': 129},\n", " {'entity': 'r6:arg2|atr',\n", " 'score': 0.07937594,\n", " 'index': 33,\n", " 'word': 'there',\n", " 'start': 135,\n", " 'end': 140},\n", " {'entity': 'r6:root',\n", " 'score': 0.55574423,\n", " 'index': 34,\n", " 'word': 'is',\n", " 'start': 141,\n", " 'end': 143},\n", " {'entity': 'r5:arg1|tem',\n", " 'score': 0.38110724,\n", " 'index': 35,\n", " 'word': 'life',\n", " 'start': 144,\n", " 'end': 148},\n", " {'entity': 'r7:arg1|pat',\n", " 'score': 0.16291595,\n", " 'index': 42,\n", " 'word': 'face',\n", " 'start': 171,\n", " 'end': 175},\n", " {'entity': 'r7:root',\n", " 'score': 0.6100769,\n", " 'index': 44,\n", " 'word': 'created',\n", " 'start': 180,\n", " 'end': 187},\n", " {'entity': 'r7:arg0|agt',\n", " 'score': 0.15812016,\n", " 'index': 45,\n", " 'word': 'by',\n", " 'start': 188,\n", " 'end': 190},\n", " {'entity': 'r6:arg0|agt',\n", " 'score': 0.18646379,\n", " 'index': 62,\n", " 'word': 'spacecraft',\n", " 'start': 249,\n", " 'end': 259},\n", " {'entity': 'r7:root',\n", " 'score': 0.6611128,\n", " 'index': 64,\n", " 'word': 'ci',\n", " 'start': 264,\n", " 'end': 266},\n", " {'entity': 'r7:root',\n", " 'score': 0.5775215,\n", " 'index': 65,\n", " 'word': '##rc',\n", " 'start': 266,\n", " 'end': 268},\n", " {'entity': 'r7:root',\n", " 'score': 0.35779056,\n", " 'index': 66,\n", " 'word': '##ling',\n", " 'start': 268,\n", " 'end': 272},\n", " {'entity': 'r7:arg1|pat',\n", " 'score': 0.39385104,\n", " 'index': 68,\n", " 'word': 'planet',\n", " 'start': 277,\n", " 'end': 283},\n", " {'entity': 'r7:root',\n", " 'score': 0.42999077,\n", " 'index': 70,\n", " 'word': 'sna',\n", " 'start': 285,\n", " 'end': 288},\n", " {'entity': 'r8:root',\n", " 'score': 0.26388708,\n", " 'index': 71,\n", " 'word': '##pping',\n", " 'start': 288,\n", " 'end': 293},\n", " {'entity': 'r7:arg1|pat',\n", " 'score': 0.2658881,\n", " 'index': 72,\n", " 'word': 'photos',\n", " 'start': 294,\n", " 'end': 300},\n", " {'entity': 'r8:arg0|agt',\n", " 'score': 0.19982801,\n", " 'index': 75,\n", " 'word': 'it',\n", " 'start': 307,\n", " 'end': 309},\n", " {'entity': 'r8:root',\n", " 'score': 0.3022762,\n", " 'index': 76,\n", " 'word': 'spotted',\n", " 'start': 310,\n", " 'end': 317},\n", " {'entity': 'r8:arg1|pat',\n", " 'score': 0.08930407,\n", " 'index': 81,\n", " 'word': 'like',\n", " 'start': 330,\n", " 'end': 334},\n", " {'entity': 'r8:arg1|pat',\n", " 'score': 0.09168834,\n", " 'index': 82,\n", " 'word': '##ness',\n", " 'start': 334,\n", " 'end': 338},\n", " {'entity': 'r8:arg0|agt',\n", " 'score': 0.054486226,\n", " 'index': 88,\n", " 'word': 'Us',\n", " 'start': 356,\n", " 'end': 358},\n", " {'entity': 'r8:arg0|agt',\n", " 'score': 0.10709534,\n", " 'index': 89,\n", " 'word': 'scientists',\n", " 'start': 359,\n", " 'end': 369},\n", " {'entity': 'r9:root',\n", " 'score': 0.18592234,\n", " 'index': 90,\n", " 'word': 'figure',\n", " 'start': 370,\n", " 'end': 376},\n", " {'entity': 'r9:root',\n", " 'score': 0.15209025,\n", " 'index': 91,\n", " 'word': '##d',\n", " 'start': 376,\n", " 'end': 377},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.045862645,\n", " 'index': 94,\n", " 'word': 'it',\n", " 'start': 387,\n", " 'end': 389},\n", " {'entity': 'r9:root',\n", " 'score': 0.115167715,\n", " 'index': 95,\n", " 'word': 'was',\n", " 'start': 390,\n", " 'end': 393},\n", " {'entity': 'r9:root',\n", " 'score': 0.04374532,\n", " 'index': 102,\n", " 'word': 'common',\n", " 'start': 421,\n", " 'end': 427},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.040294692,\n", " 'index': 110,\n", " 'word': 'one',\n", " 'start': 454,\n", " 'end': 457},\n", " {'entity': 'r9:root',\n", " 'score': 0.1052284,\n", " 'index': 111,\n", " 'word': 'had',\n", " 'start': 458,\n", " 'end': 461},\n", " {'entity': 'r8:arg2|atr',\n", " 'score': 0.04526653,\n", " 'index': 112,\n", " 'word': 'sh',\n", " 'start': 462,\n", " 'end': 464},\n", " {'entity': 'r8:arg2|atr',\n", " 'score': 0.050974954,\n", " 'index': 113,\n", " 'word': '##adow',\n", " 'start': 464,\n", " 'end': 468},\n", " {'entity': 'r8:arg0|agt',\n", " 'score': 0.04716369,\n", " 'index': 115,\n", " 'word': 'that',\n", " 'start': 470,\n", " 'end': 474},\n", " {'entity': 'r9:root',\n", " 'score': 0.12286918,\n", " 'index': 116,\n", " 'word': 'made',\n", " 'start': 475,\n", " 'end': 479},\n", " {'entity': 'r9:root',\n", " 'score': 0.034002524,\n", " 'index': 117,\n", " 'word': 'it',\n", " 'start': 480,\n", " 'end': 482},\n", " {'entity': 'r9:root',\n", " 'score': 0.085215025,\n", " 'index': 118,\n", " 'word': 'look',\n", " 'start': 483,\n", " 'end': 487},\n", " {'entity': 'r9:root',\n", " 'score': 0.04240514,\n", " 'index': 119,\n", " 'word': 'like',\n", " 'start': 488,\n", " 'end': 492},\n", " {'entity': 'r9:root',\n", " 'score': 0.04686627,\n", " 'index': 129,\n", " 'word': 'days',\n", " 'start': 523,\n", " 'end': 527},\n", " {'entity': 'r9:root',\n", " 'score': 0.040811997,\n", " 'index': 130,\n", " 'word': 'later',\n", " 'start': 528,\n", " 'end': 533},\n", " {'entity': 'r10:root',\n", " 'score': 0.040206842,\n", " 'index': 132,\n", " 'word': 'we',\n", " 'start': 535,\n", " 'end': 537},\n", " {'entity': 'r9:root',\n", " 'score': 0.080649935,\n", " 'index': 133,\n", " 'word': 'revealed',\n", " 'start': 538,\n", " 'end': 546},\n", " {'entity': 'r9:root',\n", " 'score': 0.042861167,\n", " 'index': 135,\n", " 'word': 'image',\n", " 'start': 551,\n", " 'end': 556},\n", " {'entity': 'r9:root',\n", " 'score': 0.042270634,\n", " 'index': 136,\n", " 'word': 'for',\n", " 'start': 557,\n", " 'end': 560},\n", " {'entity': 'r9:root',\n", " 'score': 0.08127655,\n", " 'index': 139,\n", " 'word': 'see',\n", " 'start': 568,\n", " 'end': 571},\n", " {'entity': 'r9:root',\n", " 'score': 0.044283886,\n", " 'index': 142,\n", " 'word': 'we',\n", " 'start': 577,\n", " 'end': 579},\n", " {'entity': 'r9:root',\n", " 'score': 0.08544878,\n", " 'index': 143,\n", " 'word': 'made',\n", " 'start': 580,\n", " 'end': 584},\n", " {'entity': 'r9:root',\n", " 'score': 0.040639073,\n", " 'index': 144,\n", " 'word': 'sure',\n", " 'start': 585,\n", " 'end': 589},\n", " {'entity': 'r9:root',\n", " 'score': 0.0474719,\n", " 'index': 148,\n", " 'word': 'it',\n", " 'start': 603,\n", " 'end': 605},\n", " {'entity': 'r9:root',\n", " 'score': 0.078273796,\n", " 'index': 149,\n", " 'word': 'was',\n", " 'start': 606,\n", " 'end': 609},\n", " {'entity': 'r9:root',\n", " 'score': 0.060446404,\n", " 'index': 153,\n", " 'word': 'formation',\n", " 'start': 622,\n", " 'end': 631},\n", " {'entity': 'r9:arg1|pat',\n", " 'score': 0.030689918,\n", " 'index': 154,\n", " 'word': 'that',\n", " 'start': 632,\n", " 'end': 636},\n", " {'entity': 'r9:root',\n", " 'score': 0.08887379,\n", " 'index': 156,\n", " 'word': 'res',\n", " 'start': 642,\n", " 'end': 645},\n", " {'entity': 'r9:root',\n", " 'score': 0.07661998,\n", " 'index': 157,\n", " 'word': '##emble',\n", " 'start': 645,\n", " 'end': 650},\n", " {'entity': 'r9:root',\n", " 'score': 0.0793471,\n", " 'index': 158,\n", " 'word': '##d',\n", " 'start': 650,\n", " 'end': 651},\n", " {'entity': 'r8:arg2|atr',\n", " 'score': 0.034773763,\n", " 'index': 161,\n", " 'word': 'head',\n", " 'start': 660,\n", " 'end': 664},\n", " {'entity': 'r9:arg1|pat',\n", " 'score': 0.034561444,\n", " 'index': 166,\n", " 'word': 'all',\n", " 'start': 679,\n", " 'end': 682},\n", " {'entity': 'r9:arg1|pat',\n", " 'score': 0.038585175,\n", " 'index': 168,\n", " 'word': 'it',\n", " 'start': 686,\n", " 'end': 688},\n", " {'entity': 'r9:root',\n", " 'score': 0.087780945,\n", " 'index': 169,\n", " 'word': 'was',\n", " 'start': 689,\n", " 'end': 692},\n", " {'entity': 'r9:root',\n", " 'score': 0.0999068,\n", " 'index': 170,\n", " 'word': 'formed',\n", " 'start': 693,\n", " 'end': 699},\n", " {'entity': 'r8:arg2|atr',\n", " 'score': 0.027370188,\n", " 'index': 171,\n", " 'word': 'by',\n", " 'start': 700,\n", " 'end': 702},\n", " {'entity': 'r8:arg0|agt',\n", " 'score': 0.078722976,\n", " 'index': 176,\n", " 'word': 'We',\n", " 'start': 712,\n", " 'end': 714},\n", " {'entity': 'r9:root',\n", " 'score': 0.12920254,\n", " 'index': 178,\n", " 'word': 'announced',\n", " 'start': 720,\n", " 'end': 729},\n", " {'entity': 'r8:arg1|pat',\n", " 'score': 0.07986989,\n", " 'index': 179,\n", " 'word': 'it',\n", " 'start': 730,\n", " 'end': 732},\n", " {'entity': 'r8:arg0|agt',\n", " 'score': 0.048153095,\n", " 'index': 181,\n", " 'word': 'we',\n", " 'start': 741,\n", " 'end': 743},\n", " {'entity': 'r9:root',\n", " 'score': 0.10950655,\n", " 'index': 182,\n", " 'word': 'thought',\n", " 'start': 744,\n", " 'end': 751},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.041486118,\n", " 'index': 183,\n", " 'word': 'it',\n", " 'start': 752,\n", " 'end': 754},\n", " {'entity': 'r9:root',\n", " 'score': 0.05073052,\n", " 'index': 184,\n", " 'word': 'would',\n", " 'start': 755,\n", " 'end': 760},\n", " {'entity': 'r9:root',\n", " 'score': 0.10624526,\n", " 'index': 185,\n", " 'word': 'be',\n", " 'start': 761,\n", " 'end': 763},\n", " {'entity': 'r8:arg2|atr',\n", " 'score': 0.054641366,\n", " 'index': 188,\n", " 'word': 'way',\n", " 'start': 771,\n", " 'end': 774},\n", " {'entity': 'r9:root',\n", " 'score': 0.10240572,\n", " 'index': 190,\n", " 'word': 'engage',\n", " 'start': 778,\n", " 'end': 784},\n", " {'entity': 'r9:arg1|pat',\n", " 'score': 0.044021852,\n", " 'index': 192,\n", " 'word': 'public',\n", " 'start': 789,\n", " 'end': 795},\n", " {'entity': 'r9:root',\n", " 'score': 0.090464294,\n", " 'index': 200,\n", " 'word': 'at',\n", " 'start': 822,\n", " 'end': 824},\n", " {'entity': 'r9:root',\n", " 'score': 0.09293412,\n", " 'index': 201,\n", " 'word': '##rra',\n", " 'start': 824,\n", " 'end': 827},\n", " {'entity': 'r9:root',\n", " 'score': 0.09475694,\n", " 'index': 202,\n", " 'word': '##ct',\n", " 'start': 827,\n", " 'end': 829},\n", " {'entity': 'r9:arg1|pat',\n", " 'score': 0.056167223,\n", " 'index': 203,\n", " 'word': 'attention',\n", " 'start': 830,\n", " 'end': 839},\n", " {'entity': 'r8:arg2|atr',\n", " 'score': 0.03035166,\n", " 'index': 204,\n", " 'word': 'to',\n", " 'start': 840,\n", " 'end': 842},\n", " {'entity': 'r9:arg1|pat',\n", " 'score': 0.032432236,\n", " 'index': 209,\n", " 'word': 'it',\n", " 'start': 854,\n", " 'end': 856},\n", " {'entity': 'r9:root',\n", " 'score': 0.083309464,\n", " 'index': 210,\n", " 'word': 'did',\n", " 'start': 857,\n", " 'end': 860},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.038414363,\n", " 'index': 213,\n", " 'word': 'face',\n", " 'start': 867,\n", " 'end': 871},\n", " {'entity': 'r8:arg2|atr',\n", " 'score': 0.02903995,\n", " 'index': 216,\n", " 'word': 'soon',\n", " 'start': 880,\n", " 'end': 884},\n", " {'entity': 'r9:root',\n", " 'score': 0.07692599,\n", " 'index': 217,\n", " 'word': 'became',\n", " 'start': 885,\n", " 'end': 891},\n", " {'entity': 'r8:arg2|atr',\n", " 'score': 0.051249504,\n", " 'index': 219,\n", " 'word': 'pop',\n", " 'start': 894,\n", " 'end': 897},\n", " {'entity': 'r8:arg2|atr',\n", " 'score': 0.04843377,\n", " 'index': 220,\n", " 'word': 'i',\n", " 'start': 898,\n", " 'end': 899},\n", " {'entity': 'r8:arg2|atr',\n", " 'score': 0.04545699,\n", " 'index': 221,\n", " 'word': '##con',\n", " 'start': 899,\n", " 'end': 902},\n", " {'entity': 'r9:root',\n", " 'score': 0.08494211,\n", " 'index': 223,\n", " 'word': 'shot',\n", " 'start': 904,\n", " 'end': 908},\n", " {'entity': 'r8:arg2|atr',\n", " 'score': 0.03157478,\n", " 'index': 224,\n", " 'word': 'in',\n", " 'start': 909,\n", " 'end': 911},\n", " {'entity': 'r9:root',\n", " 'score': 0.1036089,\n", " 'index': 227,\n", " 'word': 'appeared',\n", " 'start': 920,\n", " 'end': 928},\n", " {'entity': 'r8:arg2|atr',\n", " 'score': 0.03158019,\n", " 'index': 228,\n", " 'word': 'in',\n", " 'start': 929,\n", " 'end': 931},\n", " {'entity': 'r8:argM|tmp',\n", " 'score': 0.091016516,\n", " 'index': 247,\n", " 'word': 'for',\n", " 'start': 1009,\n", " 'end': 1012},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.06676676,\n", " 'index': 252,\n", " 'word': 'people',\n", " 'start': 1028,\n", " 'end': 1034},\n", " {'entity': 'r8:root',\n", " 'score': 0.20488475,\n", " 'index': 253,\n", " 'word': 'thought',\n", " 'start': 1035,\n", " 'end': 1042},\n", " {'entity': 'r7:arg1|tem',\n", " 'score': 0.36603826,\n", " 'index': 256,\n", " 'word': 'land',\n", " 'start': 1055,\n", " 'end': 1059},\n", " {'entity': 'r7:arg1|tem',\n", " 'score': 0.31067976,\n", " 'index': 257,\n", " 'word': '##form',\n", " 'start': 1059,\n", " 'end': 1063},\n", " {'entity': 'r7:root',\n", " 'score': 0.48778686,\n", " 'index': 258,\n", " 'word': 'was',\n", " 'start': 1064,\n", " 'end': 1067},\n", " {'entity': 'r7:arg2|atr',\n", " 'score': 0.13593948,\n", " 'index': 259,\n", " 'word': 'evidence',\n", " 'start': 1068,\n", " 'end': 1076},\n", " {'entity': 'r6:arg0|agt',\n", " 'score': 0.21349978,\n", " 'index': 268,\n", " 'word': 'scientists',\n", " 'start': 1106,\n", " 'end': 1116},\n", " {'entity': 'r7:root',\n", " 'score': 0.48215854,\n", " 'index': 269,\n", " 'word': 'wanted',\n", " 'start': 1117,\n", " 'end': 1123},\n", " {'entity': 'r7:root',\n", " 'score': 0.66229856,\n", " 'index': 271,\n", " 'word': 'hide',\n", " 'start': 1127,\n", " 'end': 1131},\n", " {'entity': 'r7:arg1|pat',\n", " 'score': 0.34266004,\n", " 'index': 272,\n", " 'word': 'it',\n", " 'start': 1132,\n", " 'end': 1134},\n", " {'entity': 'r6:arg0|agt',\n", " 'score': 0.27256963,\n", " 'index': 278,\n", " 'word': 'defender',\n", " 'start': 1152,\n", " 'end': 1160},\n", " {'entity': 'r5:root',\n", " 'score': 0.4016244,\n", " 'index': 286,\n", " 'word': 'was',\n", " 'start': 1192,\n", " 'end': 1195},\n", " {'entity': 'r6:arg0|agt',\n", " 'score': 0.3085603,\n", " 'index': 293,\n", " 'word': 'We',\n", " 'start': 1226,\n", " 'end': 1228},\n", " {'entity': 'r7:root',\n", " 'score': 0.68402517,\n", " 'index': 294,\n", " 'word': 'decided',\n", " 'start': 1229,\n", " 'end': 1236},\n", " {'entity': 'r7:root',\n", " 'score': 0.6312899,\n", " 'index': 296,\n", " 'word': 'take',\n", " 'start': 1240,\n", " 'end': 1244},\n", " {'entity': 'r7:arg1|pat',\n", " 'score': 0.2447523,\n", " 'index': 298,\n", " 'word': 'shot',\n", " 'start': 1253,\n", " 'end': 1257},\n", " {'entity': 'r5:argM|fin',\n", " 'score': 0.21691667,\n", " 'index': 300,\n", " 'word': 'to',\n", " 'start': 1263,\n", " 'end': 1265},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.0507591,\n", " 'index': 303,\n", " 'word': 'we',\n", " 'start': 1276,\n", " 'end': 1278},\n", " {'entity': 'r9:root',\n", " 'score': 0.110183075,\n", " 'index': 304,\n", " 'word': 'were',\n", " 'start': 1279,\n", " 'end': 1283},\n", " {'entity': 'r9:root',\n", " 'score': 0.0827311,\n", " 'index': 305,\n", " 'word': '##n',\n", " 'start': 1283,\n", " 'end': 1284},\n", " {'entity': 'r8:arg2|atr',\n", " 'score': 0.07966802,\n", " 'index': 308,\n", " 'word': 'wrong',\n", " 'start': 1287,\n", " 'end': 1292},\n", " {'entity': 'r8:argM|tmp',\n", " 'score': 0.047043335,\n", " 'index': 310,\n", " 'word': 'on',\n", " 'start': 1294,\n", " 'end': 1296},\n", " {'entity': 'r8:arg0|agt',\n", " 'score': 0.037761096,\n", " 'index': 316,\n", " 'word': 'Michael',\n", " 'start': 1312,\n", " 'end': 1319},\n", " {'entity': 'r9:root',\n", " 'score': 0.11788476,\n", " 'index': 326,\n", " 'word': 'took',\n", " 'start': 1359,\n", " 'end': 1363},\n", " {'entity': 'r8:arg1|pat',\n", " 'score': 0.061609197,\n", " 'index': 328,\n", " 'word': 'picture',\n", " 'start': 1366,\n", " 'end': 1373},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.04639362,\n", " 'index': 329,\n", " 'word': 'that',\n", " 'start': 1374,\n", " 'end': 1378},\n", " {'entity': 'r9:root',\n", " 'score': 0.10497463,\n", " 'index': 330,\n", " 'word': 'was',\n", " 'start': 1379,\n", " 'end': 1382},\n", " {'entity': 'r9:root',\n", " 'score': 0.037833076,\n", " 'index': 332,\n", " 'word': 'times',\n", " 'start': 1387,\n", " 'end': 1392},\n", " {'entity': 'r9:root',\n", " 'score': 0.04543908,\n", " 'index': 333,\n", " 'word': 'sharp',\n", " 'start': 1393,\n", " 'end': 1398},\n", " {'entity': 'r9:root',\n", " 'score': 0.043668192,\n", " 'index': 334,\n", " 'word': '##er',\n", " 'start': 1398,\n", " 'end': 1400},\n", " {'entity': 'r9:root',\n", " 'score': 0.06038779,\n", " 'index': 339,\n", " 'word': 'photos',\n", " 'start': 1426,\n", " 'end': 1432},\n", " {'entity': 'r9:root',\n", " 'score': 0.08361747,\n", " 'index': 341,\n", " 'word': 'reveal',\n", " 'start': 1434,\n", " 'end': 1440},\n", " {'entity': 'r9:root',\n", " 'score': 0.0819967,\n", " 'index': 342,\n", " 'word': '##ing',\n", " 'start': 1440,\n", " 'end': 1443},\n", " {'entity': 'r9:root',\n", " 'score': 0.050429754,\n", " 'index': 345,\n", " 'word': 'land',\n", " 'start': 1454,\n", " 'end': 1458},\n", " {'entity': 'r9:root',\n", " 'score': 0.055836514,\n", " 'index': 346,\n", " 'word': '##form',\n", " 'start': 1458,\n", " 'end': 1462},\n", " {'entity': 'r10:root',\n", " 'score': 0.030725654,\n", " 'index': 348,\n", " 'word': 'which',\n", " 'start': 1464,\n", " 'end': 1469},\n", " {'entity': 'r9:root',\n", " 'score': 0.07811248,\n", " 'index': 349,\n", " 'word': 'meant',\n", " 'start': 1470,\n", " 'end': 1475},\n", " {'entity': 'r9:root',\n", " 'score': 0.056091927,\n", " 'index': 352,\n", " 'word': 'monument',\n", " 'start': 1485,\n", " 'end': 1493},\n", " {'entity': 'r10:root',\n", " 'score': 0.037297357,\n", " 'index': 357,\n", " 'word': 'picture',\n", " 'start': 1505,\n", " 'end': 1512},\n", " {'entity': 'r9:root',\n", " 'score': 0.08379,\n", " 'index': 358,\n", " 'word': 'wasn',\n", " 'start': 1513,\n", " 'end': 1517},\n", " {'entity': 'r9:root',\n", " 'score': 0.046028677,\n", " 'index': 362,\n", " 'word': 'clear',\n", " 'start': 1525,\n", " 'end': 1530},\n", " {'entity': 'r9:root',\n", " 'score': 0.030750155,\n", " 'index': 363,\n", " 'word': 'at',\n", " 'start': 1531,\n", " 'end': 1533},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.029666642,\n", " 'index': 366,\n", " 'word': 'which',\n", " 'start': 1539,\n", " 'end': 1544},\n", " {'entity': 'r9:root',\n", " 'score': 0.07766737,\n", " 'index': 368,\n", " 'word': 'mean',\n", " 'start': 1551,\n", " 'end': 1555},\n", " {'entity': 'r9:arg1|pat',\n", " 'score': 0.037586644,\n", " 'index': 370,\n", " 'word': 'marking',\n", " 'start': 1562,\n", " 'end': 1569},\n", " {'entity': 'r9:arg1|pat',\n", " 'score': 0.039157256,\n", " 'index': 371,\n", " 'word': '##s',\n", " 'start': 1569,\n", " 'end': 1570},\n", " {'entity': 'r9:root',\n", " 'score': 0.06878179,\n", " 'index': 372,\n", " 'word': 'were',\n", " 'start': 1571,\n", " 'end': 1575},\n", " {'entity': 'r10:root',\n", " 'score': 0.06736001,\n", " 'index': 373,\n", " 'word': 'hidden',\n", " 'start': 1576,\n", " 'end': 1582},\n", " {'entity': 'r8:arg2|atr',\n", " 'score': 0.030598668,\n", " 'index': 374,\n", " 'word': 'by',\n", " 'start': 1583,\n", " 'end': 1585},\n", " {'entity': 'r9:root',\n", " 'score': 0.04365823,\n", " 'index': 381,\n", " 'word': 'ye',\n", " 'start': 1601,\n", " 'end': 1603},\n", " {'entity': 'r9:root',\n", " 'score': 0.04754055,\n", " 'index': 382,\n", " 'word': '##s',\n", " 'start': 1603,\n", " 'end': 1604},\n", " {'entity': 'r9:root',\n", " 'score': 0.04762847,\n", " 'index': 384,\n", " 'word': 'rum',\n", " 'start': 1610,\n", " 'end': 1613},\n", " {'entity': 'r9:root',\n", " 'score': 0.038992863,\n", " 'index': 385,\n", " 'word': '##or',\n", " 'start': 1613,\n", " 'end': 1615},\n", " {'entity': 'r9:root',\n", " 'score': 0.06939269,\n", " 'index': 386,\n", " 'word': 'started',\n", " 'start': 1616,\n", " 'end': 1623},\n", " {'entity': 'r9:root',\n", " 'score': 0.08153154,\n", " 'index': 390,\n", " 'word': 'prove',\n", " 'start': 1632,\n", " 'end': 1637},\n", " {'entity': 'r9:arg1|pat',\n", " 'score': 0.042257246,\n", " 'index': 391,\n", " 'word': 'them',\n", " 'start': 1638,\n", " 'end': 1642},\n", " {'entity': 'r8:arg2|atr',\n", " 'score': 0.04214874,\n", " 'index': 392,\n", " 'word': 'wrong',\n", " 'start': 1643,\n", " 'end': 1648},\n", " {'entity': 'r8:argM|tmp',\n", " 'score': 0.036950577,\n", " 'index': 393,\n", " 'word': 'on',\n", " 'start': 1649,\n", " 'end': 1651},\n", " {'entity': 'r8:arg0|agt',\n", " 'score': 0.033291806,\n", " 'index': 398,\n", " 'word': 'we',\n", " 'start': 1666,\n", " 'end': 1668},\n", " {'entity': 'r9:root',\n", " 'score': 0.0738949,\n", " 'index': 399,\n", " 'word': 'decided',\n", " 'start': 1669,\n", " 'end': 1676},\n", " {'entity': 'r9:root',\n", " 'score': 0.08140796,\n", " 'index': 401,\n", " 'word': 'take',\n", " 'start': 1680,\n", " 'end': 1684},\n", " {'entity': 'r9:arg1|pat',\n", " 'score': 0.04987211,\n", " 'index': 403,\n", " 'word': 'picture',\n", " 'start': 1693,\n", " 'end': 1700},\n", " {'entity': 'r9:root',\n", " 'score': 0.06430703,\n", " 'index': 405,\n", " 'word': 'making',\n", " 'start': 1702,\n", " 'end': 1708},\n", " {'entity': 'r8:arg2|atr',\n", " 'score': 0.038376,\n", " 'index': 406,\n", " 'word': 'sure',\n", " 'start': 1709,\n", " 'end': 1713},\n", " {'entity': 'r9:arg1|pat',\n", " 'score': 0.03541435,\n", " 'index': 407,\n", " 'word': 'it',\n", " 'start': 1714,\n", " 'end': 1716},\n", " {'entity': 'r9:root',\n", " 'score': 0.074262,\n", " 'index': 408,\n", " 'word': 'was',\n", " 'start': 1717,\n", " 'end': 1720},\n", " {'entity': 'r8:arg2|atr',\n", " 'score': 0.044221986,\n", " 'index': 410,\n", " 'word': 'cloud',\n", " 'start': 1723,\n", " 'end': 1728},\n", " {'entity': 'r8:arg2|atr',\n", " 'score': 0.039963495,\n", " 'index': 413,\n", " 'word': 'day',\n", " 'start': 1740,\n", " 'end': 1743},\n", " {'entity': 'r9:root',\n", " 'score': 0.03479836,\n", " 'index': 415,\n", " 'word': 'Mali',\n", " 'start': 1745,\n", " 'end': 1749},\n", " {'entity': 'r9:root',\n", " 'score': 0.032437824,\n", " 'index': 419,\n", " 'word': 'team',\n", " 'start': 1753,\n", " 'end': 1757},\n", " {'entity': 'r9:root',\n", " 'score': 0.08305774,\n", " 'index': 420,\n", " 'word': 'captured',\n", " 'start': 1758,\n", " 'end': 1766},\n", " {'entity': 'r9:arg1|pat',\n", " 'score': 0.04045077,\n", " 'index': 424,\n", " 'word': 'photo',\n", " 'start': 1778,\n", " 'end': 1783},\n", " {'entity': 'r9:root',\n", " 'score': 0.066850476,\n", " 'index': 425,\n", " 'word': 'using',\n", " 'start': 1784,\n", " 'end': 1789},\n", " {'entity': 'r9:root',\n", " 'score': 0.03912363,\n", " 'index': 427,\n", " 'word': 'camera',\n", " 'start': 1794,\n", " 'end': 1800},\n", " {'entity': 'r9:root',\n", " 'score': 0.0366046,\n", " 'index': 434,\n", " 'word': 'With',\n", " 'start': 1832,\n", " 'end': 1836},\n", " {'entity': 'r9:root',\n", " 'score': 0.034650415,\n", " 'index': 437,\n", " 'word': 'you',\n", " 'start': 1849,\n", " 'end': 1852},\n", " {'entity': 'r9:root',\n", " 'score': 0.07881917,\n", " 'index': 439,\n", " 'word': 'disc',\n", " 'start': 1857,\n", " 'end': 1861},\n", " {'entity': 'r9:root',\n", " 'score': 0.08386986,\n", " 'index': 440,\n", " 'word': '##ern',\n", " 'start': 1861,\n", " 'end': 1864},\n", " {'entity': 'r9:root',\n", " 'score': 0.035086773,\n", " 'index': 441,\n", " 'word': 'things',\n", " 'start': 1865,\n", " 'end': 1871},\n", " {'entity': 'r10:root',\n", " 'score': 0.033257537,\n", " 'index': 442,\n", " 'word': 'in',\n", " 'start': 1872,\n", " 'end': 1874},\n", " {'entity': 'r9:root',\n", " 'score': 0.035879184,\n", " 'index': 455,\n", " 'word': 'which',\n", " 'start': 1927,\n", " 'end': 1932},\n", " {'entity': 'r9:root',\n", " 'score': 0.0820557,\n", " 'index': 456,\n", " 'word': 'means',\n", " 'start': 1933,\n", " 'end': 1938},\n", " {'entity': 'r9:root',\n", " 'score': 0.045568023,\n", " 'index': 458,\n", " 'word': 'there',\n", " 'start': 1942,\n", " 'end': 1947},\n", " {'entity': 'r9:root',\n", " 'score': 0.07430672,\n", " 'index': 459,\n", " 'word': 'were',\n", " 'start': 1948,\n", " 'end': 1952},\n", " {'entity': 'r9:root',\n", " 'score': 0.048614994,\n", " 'index': 461,\n", " 'word': 'signs',\n", " 'start': 1957,\n", " 'end': 1962},\n", " {'entity': 'r9:root',\n", " 'score': 0.040848035,\n", " 'index': 465,\n", " 'word': 'you',\n", " 'start': 1972,\n", " 'end': 1975},\n", " {'entity': 'r9:root',\n", " 'score': 0.06783971,\n", " 'index': 466,\n", " 'word': 'could',\n", " 'start': 1976,\n", " 'end': 1981},\n", " {'entity': 'r9:root',\n", " 'score': 0.043203566,\n", " 'index': 467,\n", " 'word': 'easily',\n", " 'start': 1982,\n", " 'end': 1988},\n", " {'entity': 'r9:root',\n", " 'score': 0.07753727,\n", " 'index': 468,\n", " 'word': 'see',\n", " 'start': 1989,\n", " 'end': 1992},\n", " {'entity': 'r9:root',\n", " 'score': 0.03982749,\n", " 'index': 469,\n", " 'word': 'what',\n", " 'start': 1993,\n", " 'end': 1997},\n", " {'entity': 'r9:root',\n", " 'score': 0.050202448,\n", " 'index': 470,\n", " 'word': 'they',\n", " 'start': 1998,\n", " 'end': 2002},\n", " {'entity': 'r9:root',\n", " 'score': 0.07350303,\n", " 'index': 471,\n", " 'word': 'were',\n", " 'start': 2003,\n", " 'end': 2007},\n", " {'entity': 'r9:root',\n", " 'score': 0.041813992,\n", " 'index': 473,\n", " 'word': 'What',\n", " 'start': 2009,\n", " 'end': 2013},\n", " {'entity': 'r9:root',\n", " 'score': 0.043242317,\n", " 'index': 475,\n", " 'word': 'picture',\n", " 'start': 2018,\n", " 'end': 2025},\n", " {'entity': 'r9:root',\n", " 'score': 0.071942516,\n", " 'index': 476,\n", " 'word': 'showed',\n", " 'start': 2026,\n", " 'end': 2032},\n", " {'entity': 'r9:root',\n", " 'score': 0.065713726,\n", " 'index': 477,\n", " 'word': 'was',\n", " 'start': 2033,\n", " 'end': 2036},\n", " {'entity': 'r9:root',\n", " 'score': 0.047005344,\n", " 'index': 479,\n", " 'word': 'but',\n", " 'start': 2041,\n", " 'end': 2044},\n", " {'entity': 'r9:root',\n", " 'score': 0.053594884,\n", " 'index': 480,\n", " 'word': '##te',\n", " 'start': 2044,\n", " 'end': 2046},\n", " {'entity': 'r9:root',\n", " 'score': 0.038346287,\n", " 'index': 484,\n", " 'word': 'which',\n", " 'start': 2056,\n", " 'end': 2061},\n", " {'entity': 'r9:root',\n", " 'score': 0.071865395,\n", " 'index': 485,\n", " 'word': 'are',\n", " 'start': 2062,\n", " 'end': 2065},\n", " {'entity': 'r9:root',\n", " 'score': 0.052885562,\n", " 'index': 486,\n", " 'word': 'land',\n", " 'start': 2066,\n", " 'end': 2070},\n", " {'entity': 'r9:root',\n", " 'score': 0.058789182,\n", " 'index': 487,\n", " 'word': '##form',\n", " 'start': 2070,\n", " 'end': 2074},\n", " {'entity': 'r9:root',\n", " 'score': 0.05849257,\n", " 'index': 488,\n", " 'word': '##s',\n", " 'start': 2074,\n", " 'end': 2075},\n", " {'entity': 'r9:root',\n", " 'score': 0.05117639,\n", " 'index': 489,\n", " 'word': 'common',\n", " 'start': 2076,\n", " 'end': 2082},\n", " {'entity': 'r9:root',\n", " 'score': 0.051266044,\n", " 'index': 490,\n", " 'word': 'around',\n", " 'start': 2083,\n", " 'end': 2089}]" ] }, "execution_count": 77, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"mbruton/spa_enpt_mBERT\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"mbruton/spa_enpt_mBERT\")\n", "classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "classifier(text)" ] }, { "cell_type": "code", "execution_count": 78, "id": "a5c41e30-7978-45e4-9eba-c3eaf8fcd77d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "r0:arg1|tem 1\n", "r0:root 1\n", "r10:root 5\n", "r1:arg1|pat 1\n", "r1:arg1|tem 1\n", "r1:arg2|atr 1\n", "r1:root 1\n", "r2:root 1\n", "r4:arg1|tem 1\n", "r4:arg2|atr 1\n", "r5:arg1|tem 1\n", "r5:argM|fin 1\n", "r5:root 2\n", "r6:arg0|agt 4\n", "r6:arg2|atr 1\n", "r6:root 1\n", "r7:arg0|agt 1\n", "r7:arg1|pat 5\n", "r7:arg1|tem 2\n", "r7:arg2|atr 1\n", "r7:root 10\n", "r8:arg0|agt 8\n", "r8:arg1|pat 4\n", "r8:arg1|tem 8\n", "r8:arg2|atr 18\n", "r8:argM|tmp 3\n", "r8:root 3\n", "r9:arg1|pat 12\n", "r9:root 102\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word \n", "r0:arg1|tem you 1\n", "r0:root re 1\n", "r10:root hidden 1\n", " in 1\n", " picture 1\n", " ..\n", "r9:root what 1\n", " which 2\n", " would 1\n", " ye 1\n", " you 2\n", "Length: 174, dtype: int64" ] }, "execution_count": 78, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "with open(\"10 mbrutonspa_enpt_mBERT.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()" ] }, { "cell_type": "markdown", "id": "6da949b6-cae9-409d-a05d-d93f9f634bfd", "metadata": {}, "source": [ "## 11  benjamin/wtp-bert-mini" ] }, { "cell_type": "code", "execution_count": 80, "id": "6b9d327a-5f62-435e-923e-9a51d91c958a", "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "The checkpoint you are trying to load has model type `bert-char` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:951\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:653\u001b[0m, in \u001b[0;36m_LazyConfigMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping:\n\u001b[1;32m--> 653\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[0;32m 654\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping[key]\n", "\u001b[1;31mKeyError\u001b[0m: 'bert-char'", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[80], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 2\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-bert-mini\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 3\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-bert-mini\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m classifier \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:953\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m--> 953\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 954\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe checkpoint you are trying to load has model type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 955\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut Transformers does not recognize this architecture. This could be because of an \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 956\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124missue with the checkpoint, or because your version of Transformers is out of date.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 957\u001b[0m )\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_dict(config_dict, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39munused_kwargs)\n\u001b[0;32m 959\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 960\u001b[0m \u001b[38;5;66;03m# Fallback: use pattern matching on the string.\u001b[39;00m\n\u001b[0;32m 961\u001b[0m \u001b[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001b[39;00m\n", "\u001b[1;31mValueError\u001b[0m: The checkpoint you are trying to load has model type `bert-char` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date." ] } ], "source": [ "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"benjamin/wtp-bert-mini\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"benjamin/wtp-bert-mini\")\n", "classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "classifier(text)" ] }, { "cell_type": "markdown", "id": "6b2ab65e-9856-45a9-aa54-c230abfe8d4b", "metadata": {}, "source": [ "## 12 Babelscapewikineural-multilingual-ner" ] }, { "cell_type": "code", "execution_count": 83, "id": "d9c12157-5280-4753-b0e6-b1cd7de813bd", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'entity': 'B-ORG',\n", " 'score': 0.9951147,\n", " 'index': 8,\n", " 'word': 'NASA',\n", " 'start': 16,\n", " 'end': 20},\n", " {'entity': 'I-MISC',\n", " 'score': 0.6191017,\n", " 'index': 23,\n", " 'word': 'Face',\n", " 'start': 88,\n", " 'end': 92},\n", " {'entity': 'I-MISC',\n", " 'score': 0.5708344,\n", " 'index': 24,\n", " 'word': 'On',\n", " 'start': 93,\n", " 'end': 95},\n", " {'entity': 'I-MISC',\n", " 'score': 0.5786274,\n", " 'index': 25,\n", " 'word': 'Mars',\n", " 'start': 96,\n", " 'end': 100},\n", " {'entity': 'B-LOC',\n", " 'score': 0.877606,\n", " 'index': 37,\n", " 'word': 'Mars',\n", " 'start': 152,\n", " 'end': 156},\n", " {'entity': 'I-MISC',\n", " 'score': 0.9051992,\n", " 'index': 60,\n", " 'word': 'Viking',\n", " 'start': 240,\n", " 'end': 246},\n", " {'entity': 'I-MISC',\n", " 'score': 0.9834109,\n", " 'index': 61,\n", " 'word': '1',\n", " 'start': 247,\n", " 'end': 248},\n", " {'entity': 'I-MISC',\n", " 'score': 0.48295248,\n", " 'index': 98,\n", " 'word': 'Mart',\n", " 'start': 407,\n", " 'end': 411},\n", " {'entity': 'I-MISC',\n", " 'score': 0.47647634,\n", " 'index': 99,\n", " 'word': '##ian',\n", " 'start': 411,\n", " 'end': 414},\n", " {'entity': 'B-LOC',\n", " 'score': 0.97810775,\n", " 'index': 104,\n", " 'word': 'C',\n", " 'start': 435,\n", " 'end': 436},\n", " {'entity': 'I-LOC',\n", " 'score': 0.9512793,\n", " 'index': 105,\n", " 'word': '##yd',\n", " 'start': 436,\n", " 'end': 438},\n", " {'entity': 'I-LOC',\n", " 'score': 0.9480485,\n", " 'index': 106,\n", " 'word': '##onia',\n", " 'start': 438,\n", " 'end': 442},\n", " {'entity': 'B-LOC',\n", " 'score': 0.4522933,\n", " 'index': 121,\n", " 'word': 'Egypt',\n", " 'start': 496,\n", " 'end': 501},\n", " {'entity': 'I-MISC',\n", " 'score': 0.43941417,\n", " 'index': 122,\n", " 'word': '##ion',\n", " 'start': 501,\n", " 'end': 504},\n", " {'entity': 'I-MISC',\n", " 'score': 0.33125964,\n", " 'index': 123,\n", " 'word': 'Ph',\n", " 'start': 505,\n", " 'end': 507},\n", " {'entity': 'I-PER',\n", " 'score': 0.2833639,\n", " 'index': 124,\n", " 'word': '##ara',\n", " 'start': 507,\n", " 'end': 510},\n", " {'entity': 'B-ORG',\n", " 'score': 0.9865861,\n", " 'index': 194,\n", " 'word': 'NASA',\n", " 'start': 801,\n", " 'end': 805},\n", " {'entity': 'B-LOC',\n", " 'score': 0.814626,\n", " 'index': 205,\n", " 'word': 'Mars',\n", " 'start': 843,\n", " 'end': 847},\n", " {'entity': 'B-LOC',\n", " 'score': 0.7951029,\n", " 'index': 215,\n", " 'word': 'Mars',\n", " 'start': 875,\n", " 'end': 879},\n", " {'entity': 'B-LOC',\n", " 'score': 0.87542975,\n", " 'index': 263,\n", " 'word': 'Mars',\n", " 'start': 1088,\n", " 'end': 1092},\n", " {'entity': 'B-ORG',\n", " 'score': 0.99368435,\n", " 'index': 282,\n", " 'word': 'NASA',\n", " 'start': 1169,\n", " 'end': 1173},\n", " {'entity': 'B-LOC',\n", " 'score': 0.7881979,\n", " 'index': 291,\n", " 'word': 'Mars',\n", " 'start': 1220,\n", " 'end': 1224},\n", " {'entity': 'B-PER',\n", " 'score': 0.9987973,\n", " 'index': 316,\n", " 'word': 'Michael',\n", " 'start': 1312,\n", " 'end': 1319},\n", " {'entity': 'I-PER',\n", " 'score': 0.99919313,\n", " 'index': 317,\n", " 'word': 'Mali',\n", " 'start': 1320,\n", " 'end': 1324},\n", " {'entity': 'I-PER',\n", " 'score': 0.99887544,\n", " 'index': 318,\n", " 'word': '##n',\n", " 'start': 1324,\n", " 'end': 1325},\n", " {'entity': 'I-MISC',\n", " 'score': 0.8786556,\n", " 'index': 321,\n", " 'word': 'Mars',\n", " 'start': 1334,\n", " 'end': 1338},\n", " {'entity': 'I-MISC',\n", " 'score': 0.9564052,\n", " 'index': 322,\n", " 'word': 'Or',\n", " 'start': 1339,\n", " 'end': 1341},\n", " {'entity': 'I-MISC',\n", " 'score': 0.96700704,\n", " 'index': 323,\n", " 'word': '##biter',\n", " 'start': 1341,\n", " 'end': 1346},\n", " {'entity': 'I-MISC',\n", " 'score': 0.89913684,\n", " 'index': 338,\n", " 'word': 'Viking',\n", " 'start': 1419,\n", " 'end': 1425},\n", " {'entity': 'B-PER',\n", " 'score': 0.97243416,\n", " 'index': 415,\n", " 'word': 'Mali',\n", " 'start': 1745,\n", " 'end': 1749},\n", " {'entity': 'I-PER',\n", " 'score': 0.963992,\n", " 'index': 416,\n", " 'word': '##n',\n", " 'start': 1749,\n", " 'end': 1750},\n", " {'entity': 'I-MISC',\n", " 'score': 0.7103455,\n", " 'index': 492,\n", " 'word': 'American',\n", " 'start': 2094,\n", " 'end': 2102},\n", " {'entity': 'I-MISC',\n", " 'score': 0.5068105,\n", " 'index': 493,\n", " 'word': 'West',\n", " 'start': 2103,\n", " 'end': 2107}]" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"Babelscape/wikineural-multilingual-ner\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"Babelscape/wikineural-multilingual-ner\")\n", "classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "classifier(text)" ] }, { "cell_type": "code", "execution_count": 84, "id": "60a70391-65f5-4c73-9cd3-553d635386d9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "B-LOC 7\n", "B-ORG 3\n", "B-PER 2\n", "I-LOC 2\n", "I-MISC 15\n", "I-PER 4\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word \n", "B-LOC C 1\n", " Egypt 1\n", " Mars 5\n", "B-ORG NASA 3\n", "B-PER Mali 1\n", " Michael 1\n", "I-LOC ##onia 1\n", " ##yd 1\n", "I-MISC ##biter 1\n", " ##ian 1\n", " ##ion 1\n", " 1 1\n", " American 1\n", " Face 1\n", " Mars 2\n", " Mart 1\n", " On 1\n", " Or 1\n", " Ph 1\n", " Viking 2\n", " West 1\n", "I-PER ##ara 1\n", " ##n 2\n", " Mali 1\n", "dtype: int64" ] }, "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "with open(\"12 Babelscapewikineural-multilingual-ner.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()" ] }, { "cell_type": "markdown", "id": "b152a918-a4b1-4f0d-aa5a-48158eb9bee8", "metadata": {}, "source": [ "## 13 julian-schelb/roberta-ner-multilingual" ] }, { "cell_type": "code", "execution_count": 88, "id": "982634a2-a7f4-4c9d-92d4-f3b34ba89931", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'entity': 'B-ORG',\n", " 'score': 0.8837392,\n", " 'index': 8,\n", " 'word': '▁NASA',\n", " 'start': 16,\n", " 'end': 20},\n", " {'entity': 'B-ORG',\n", " 'score': 0.38925776,\n", " 'index': 97,\n", " 'word': '▁Marti',\n", " 'start': 407,\n", " 'end': 412},\n", " {'entity': 'B-LOC',\n", " 'score': 0.7155649,\n", " 'index': 103,\n", " 'word': '▁Cy',\n", " 'start': 435,\n", " 'end': 437},\n", " {'entity': 'B-LOC',\n", " 'score': 0.64458394,\n", " 'index': 104,\n", " 'word': 'do',\n", " 'start': 437,\n", " 'end': 439},\n", " {'entity': 'B-LOC',\n", " 'score': 0.623109,\n", " 'index': 105,\n", " 'word': 'nia',\n", " 'start': 439,\n", " 'end': 442},\n", " {'entity': 'B-ORG',\n", " 'score': 0.34994408,\n", " 'index': 119,\n", " 'word': '▁Egypt',\n", " 'start': 496,\n", " 'end': 501},\n", " {'entity': 'B-ORG',\n", " 'score': 0.32543704,\n", " 'index': 120,\n", " 'word': 'ion',\n", " 'start': 501,\n", " 'end': 504},\n", " {'entity': 'I-PER',\n", " 'score': 0.4674562,\n", " 'index': 121,\n", " 'word': '▁Phar',\n", " 'start': 505,\n", " 'end': 509},\n", " {'entity': 'I-PER',\n", " 'score': 0.53341544,\n", " 'index': 122,\n", " 'word': 'a',\n", " 'start': 509,\n", " 'end': 510},\n", " {'entity': 'I-PER',\n", " 'score': 0.50114465,\n", " 'index': 123,\n", " 'word': 'oh',\n", " 'start': 510,\n", " 'end': 512},\n", " {'entity': 'B-ORG',\n", " 'score': 0.5386203,\n", " 'index': 193,\n", " 'word': '▁NASA',\n", " 'start': 801,\n", " 'end': 805},\n", " {'entity': 'B-ORG',\n", " 'score': 0.6421071,\n", " 'index': 285,\n", " 'word': '▁NASA',\n", " 'start': 1169,\n", " 'end': 1173},\n", " {'entity': 'B-PER',\n", " 'score': 0.521761,\n", " 'index': 319,\n", " 'word': '▁Michael',\n", " 'start': 1312,\n", " 'end': 1319},\n", " {'entity': 'I-PER',\n", " 'score': 0.66133285,\n", " 'index': 320,\n", " 'word': '▁Malin',\n", " 'start': 1320,\n", " 'end': 1325},\n", " {'entity': 'B-ORG',\n", " 'score': 0.87530375,\n", " 'index': 323,\n", " 'word': '▁Mars',\n", " 'start': 1334,\n", " 'end': 1338},\n", " {'entity': 'I-ORG',\n", " 'score': 0.775388,\n", " 'index': 324,\n", " 'word': '▁Or',\n", " 'start': 1339,\n", " 'end': 1341},\n", " {'entity': 'I-ORG',\n", " 'score': 0.74558014,\n", " 'index': 325,\n", " 'word': 'bit',\n", " 'start': 1341,\n", " 'end': 1344},\n", " {'entity': 'I-ORG',\n", " 'score': 0.77897793,\n", " 'index': 326,\n", " 'word': 'er',\n", " 'start': 1344,\n", " 'end': 1346},\n", " {'entity': 'B-LOC',\n", " 'score': 0.781982,\n", " 'index': 491,\n", " 'word': '▁American',\n", " 'start': 2094,\n", " 'end': 2102},\n", " {'entity': 'I-LOC',\n", " 'score': 0.5271412,\n", " 'index': 492,\n", " 'word': '▁West',\n", " 'start': 2103,\n", " 'end': 2107}]" ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"julian-schelb/roberta-ner-multilingual\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"julian-schelb/roberta-ner-multilingual\")\n", "classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "classifier(text)" ] }, { "cell_type": "code", "execution_count": 89, "id": "1836b86a-e0c8-42c4-b199-b45b3e4c6235", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "B-LOC 4\n", "B-ORG 7\n", "B-PER 1\n", "I-LOC 1\n", "I-ORG 3\n", "I-PER 4\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word \n", "B-LOC do 1\n", " nia 1\n", " ▁American 1\n", " ▁Cy 1\n", "B-ORG ion 1\n", " ▁Egypt 1\n", " ▁Mars 1\n", " ▁Marti 1\n", " ▁NASA 3\n", "B-PER ▁Michael 1\n", "I-LOC ▁West 1\n", "I-ORG bit 1\n", " er 1\n", " ▁Or 1\n", "I-PER a 1\n", " oh 1\n", " ▁Malin 1\n", " ▁Phar 1\n", "dtype: int64" ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with open(\"13 julian-schelbroberta-ner-multilingual.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()" ] }, { "cell_type": "markdown", "id": "16569163-0440-4747-8af0-167e67d576a5", "metadata": {}, "source": [ "## 14 FacebookAI/xlm-roberta-large-finetuned-conll03-german" ] }, { "cell_type": "code", "execution_count": 91, "id": "3792dc25-ec55-4d6d-822c-e921c9129cd0", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of the model checkpoint at FacebookAI/xlm-roberta-large-finetuned-conll03-english were not used when initializing XLMRobertaForTokenClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n", "- This IS expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[{'entity': 'I-ORG', 'score': 0.9999913, 'index': 8, 'word': '▁NASA', 'start': 16, 'end': 20}, {'entity': 'I-MISC', 'score': 0.9999641, 'index': 23, 'word': '▁Face', 'start': 88, 'end': 92}, {'entity': 'I-MISC', 'score': 0.99989665, 'index': 24, 'word': '▁On', 'start': 93, 'end': 95}, {'entity': 'I-MISC', 'score': 0.97350365, 'index': 25, 'word': '▁Mars', 'start': 96, 'end': 100}, {'entity': 'I-LOC', 'score': 0.9999362, 'index': 36, 'word': '▁Mars', 'start': 152, 'end': 156}, {'entity': 'I-MISC', 'score': 0.9992086, 'index': 58, 'word': '▁Viking', 'start': 240, 'end': 246}, {'entity': 'I-MISC', 'score': 0.9989502, 'index': 59, 'word': '▁1', 'start': 247, 'end': 248}, {'entity': 'I-MISC', 'score': 0.999977, 'index': 97, 'word': '▁Marti', 'start': 407, 'end': 412}, {'entity': 'I-MISC', 'score': 0.99619055, 'index': 98, 'word': 'an', 'start': 412, 'end': 414}, {'entity': 'I-LOC', 'score': 0.9999354, 'index': 103, 'word': '▁Cy', 'start': 435, 'end': 437}, {'entity': 'I-LOC', 'score': 0.99994576, 'index': 104, 'word': 'do', 'start': 437, 'end': 439}, {'entity': 'I-LOC', 'score': 0.99992585, 'index': 105, 'word': 'nia', 'start': 439, 'end': 442}, {'entity': 'I-MISC', 'score': 0.9999789, 'index': 119, 'word': '▁Egypt', 'start': 496, 'end': 501}, {'entity': 'I-MISC', 'score': 0.9614088, 'index': 120, 'word': 'ion', 'start': 501, 'end': 504}, {'entity': 'I-ORG', 'score': 0.99997246, 'index': 193, 'word': '▁NASA', 'start': 801, 'end': 805}, {'entity': 'I-LOC', 'score': 0.99979633, 'index': 205, 'word': '▁Mars', 'start': 843, 'end': 847}, {'entity': 'I-LOC', 'score': 0.9998061, 'index': 215, 'word': '▁Mars', 'start': 875, 'end': 879}, {'entity': 'I-LOC', 'score': 0.99984956, 'index': 264, 'word': '▁Mars', 'start': 1088, 'end': 1092}, {'entity': 'I-ORG', 'score': 0.99996305, 'index': 285, 'word': '▁NASA', 'start': 1169, 'end': 1173}, {'entity': 'I-LOC', 'score': 0.9998203, 'index': 295, 'word': '▁Mars', 'start': 1220, 'end': 1224}, {'entity': 'I-PER', 'score': 0.9999932, 'index': 319, 'word': '▁Michael', 'start': 1312, 'end': 1319}, {'entity': 'I-PER', 'score': 0.99999106, 'index': 320, 'word': '▁Malin', 'start': 1320, 'end': 1325}, {'entity': 'I-MISC', 'score': 0.94105357, 'index': 323, 'word': '▁Mars', 'start': 1334, 'end': 1338}, {'entity': 'I-MISC', 'score': 0.9839579, 'index': 324, 'word': '▁Or', 'start': 1339, 'end': 1341}, {'entity': 'I-MISC', 'score': 0.9913346, 'index': 325, 'word': 'bit', 'start': 1341, 'end': 1344}, {'entity': 'I-MISC', 'score': 0.9759228, 'index': 326, 'word': 'er', 'start': 1344, 'end': 1346}, {'entity': 'I-MISC', 'score': 0.999749, 'index': 341, 'word': '▁Viking', 'start': 1419, 'end': 1425}, {'entity': 'I-PER', 'score': 0.9999914, 'index': 416, 'word': '▁Malin', 'start': 1745, 'end': 1750}, {'entity': 'I-MISC', 'score': 0.92417294, 'index': 491, 'word': '▁American', 'start': 2094, 'end': 2102}, {'entity': 'I-LOC', 'score': 0.99954396, 'index': 492, 'word': '▁West', 'start': 2103, 'end': 2107}]\n" ] } ], "source": [ "import json\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"FacebookAI/xlm-roberta-large-finetuned-conll03-german\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"FacebookAI/xlm-roberta-large-finetuned-conll03-english\")\n", "classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "salida=classifier(text)\n", "print(salida)\n" ] }, { "cell_type": "code", "execution_count": 92, "id": "356a5837-b0f4-4c63-95f0-42a562b8553e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "I-LOC 9\n", "I-MISC 15\n", "I-ORG 3\n", "I-PER 3\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word \n", "I-LOC do 1\n", " nia 1\n", " ▁Cy 1\n", " ▁Mars 5\n", " ▁West 1\n", "I-MISC an 1\n", " bit 1\n", " er 1\n", " ion 1\n", " ▁1 1\n", " ▁American 1\n", " ▁Egypt 1\n", " ▁Face 1\n", " ▁Mars 2\n", " ▁Marti 1\n", " ▁On 1\n", " ▁Or 1\n", " ▁Viking 2\n", "I-ORG ▁NASA 3\n", "I-PER ▁Malin 2\n", " ▁Michael 1\n", "dtype: int64" ] }, "execution_count": 92, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with open(\"14 FacebookAIxlm-roberta-large-finetuned-conll03-german.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()" ] }, { "cell_type": "markdown", "id": "f34e36ec-8d26-49b4-89a5-15eaa203e168", "metadata": {}, "source": [ "## 15 jplu/tf-xlm-r-ner-40-lang" ] }, { "cell_type": "code", "execution_count": 103, "id": "293a3320-8385-42a0-9aaf-fd24487ade80", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: transformers[sentencepiece] in c:\\users\\nw\\anaconda3\\lib\\site-packages (4.41.2)\n", "Requirement already satisfied: filelock in c:\\users\\nw\\anaconda3\\lib\\site-packages (from transformers[sentencepiece]) (3.13.1)\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.23.0 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from transformers[sentencepiece]) (0.23.4)\n", "Requirement already satisfied: numpy>=1.17 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from transformers[sentencepiece]) (1.26.4)\n", "Requirement already satisfied: packaging>=20.0 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from transformers[sentencepiece]) (23.1)\n", "Requirement already satisfied: pyyaml>=5.1 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from transformers[sentencepiece]) (6.0.1)\n", "Requirement already satisfied: regex!=2019.12.17 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from transformers[sentencepiece]) (2023.10.3)\n", "Requirement already satisfied: requests in c:\\users\\nw\\anaconda3\\lib\\site-packages (from transformers[sentencepiece]) (2.32.3)\n", "Requirement already satisfied: tokenizers<0.20,>=0.19 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from transformers[sentencepiece]) (0.19.1)\n", "Requirement already satisfied: safetensors>=0.4.1 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from transformers[sentencepiece]) (0.4.3)\n", "Requirement already satisfied: tqdm>=4.27 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from transformers[sentencepiece]) (4.66.4)\n", "Requirement already satisfied: sentencepiece!=0.1.92,>=0.1.91 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from transformers[sentencepiece]) (0.2.0)\n", "Requirement already satisfied: protobuf in c:\\users\\nw\\anaconda3\\lib\\site-packages (from transformers[sentencepiece]) (3.20.3)\n", "Requirement already satisfied: fsspec>=2023.5.0 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from huggingface-hub<1.0,>=0.23.0->transformers[sentencepiece]) (2023.10.0)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from huggingface-hub<1.0,>=0.23.0->transformers[sentencepiece]) (4.9.0)\n", "Requirement already satisfied: colorama in c:\\users\\nw\\anaconda3\\lib\\site-packages (from tqdm>=4.27->transformers[sentencepiece]) (0.4.6)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from requests->transformers[sentencepiece]) (2.0.4)\n", "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from requests->transformers[sentencepiece]) (3.4)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from requests->transformers[sentencepiece]) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\nw\\anaconda3\\lib\\site-packages (from requests->transformers[sentencepiece]) (2024.2.2)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "pip install transformers[sentencepiece]" ] }, { "cell_type": "code", "execution_count": 4, "id": "a6a0c627-2ac4-48f0-8870-fd1a2479245c", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some layers from the model checkpoint at jplu/tf-xlm-r-ner-40-lang were not used when initializing TFXLMRobertaForTokenClassification: ['dropout_38']\n", "- This IS expected if you are initializing TFXLMRobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing TFXLMRobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "All the layers of TFXLMRobertaForTokenClassification were initialized from the model checkpoint at jplu/tf-xlm-r-ner-40-lang.\n", "If your task is similar to the task the model of the checkpoint was trained on, you can already use TFXLMRobertaForTokenClassification for predictions without further training.\n", "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n" ] }, { "data": { "text/plain": [ "[{'entity': 'ORG',\n", " 'score': 0.94845986,\n", " 'index': 8,\n", " 'word': '▁NASA',\n", " 'start': 15,\n", " 'end': 20},\n", " {'entity': 'ORG',\n", " 'score': 0.7998288,\n", " 'index': 58,\n", " 'word': '▁Viking',\n", " 'start': 239,\n", " 'end': 246},\n", " {'entity': 'ORG',\n", " 'score': 0.7579509,\n", " 'index': 59,\n", " 'word': '▁1',\n", " 'start': 246,\n", " 'end': 248},\n", " {'entity': 'ORG',\n", " 'score': 0.52262145,\n", " 'index': 60,\n", " 'word': '▁space',\n", " 'start': 248,\n", " 'end': 254},\n", " {'entity': 'ORG',\n", " 'score': 0.5880066,\n", " 'index': 193,\n", " 'word': '▁NASA',\n", " 'start': 800,\n", " 'end': 805},\n", " {'entity': 'ORG',\n", " 'score': 0.7987309,\n", " 'index': 285,\n", " 'word': '▁NASA',\n", " 'start': 1168,\n", " 'end': 1173},\n", " {'entity': 'PER',\n", " 'score': 0.9565463,\n", " 'index': 319,\n", " 'word': '▁Michael',\n", " 'start': 1311,\n", " 'end': 1319},\n", " {'entity': 'PER',\n", " 'score': 0.9528012,\n", " 'index': 320,\n", " 'word': '▁Malin',\n", " 'start': 1319,\n", " 'end': 1325},\n", " {'entity': 'ORG',\n", " 'score': 0.5696624,\n", " 'index': 323,\n", " 'word': '▁Mars',\n", " 'start': 1333,\n", " 'end': 1338},\n", " {'entity': 'LOC',\n", " 'score': 0.93097985,\n", " 'index': 491,\n", " 'word': '▁American',\n", " 'start': 2093,\n", " 'end': 2102},\n", " {'entity': 'LOC',\n", " 'score': 0.89757425,\n", " 'index': 492,\n", " 'word': '▁West',\n", " 'start': 2102,\n", " 'end': 2107}]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import pipeline\n", "\n", "nlp_ner = pipeline(\n", " \"ner\",\n", " model=\"jplu/tf-xlm-r-ner-40-lang\",\n", " tokenizer=(\n", " 'jplu/tf-xlm-r-ner-40-lang'),\n", " framework=\"tf\"\n", ")\n", "\n", "nlp_ner(text)\n", "\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "8ea0424d-e2da-4958-be66-2dc6e6556456", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "LOC 2\n", "ORG 7\n", "PER 2\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word \n", "LOC ▁American 1\n", " ▁West 1\n", "ORG ▁1 1\n", " ▁Mars 1\n", " ▁NASA 3\n", " ▁Viking 1\n", " ▁space 1\n", "PER ▁Malin 1\n", " ▁Michael 1\n", "dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with open(\"15 jplutf-xlm-r-ner-40-lang.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()" ] }, { "cell_type": "markdown", "id": "6fc9092a-d4c6-4365-9d73-205ea982bde6", "metadata": {}, "source": [ "## 16 sagorsarker/codeswitch-spaeng-lid-lince" ] }, { "cell_type": "code", "execution_count": 127, "id": "c71bfa05-c1e7-4bae-bcc6-78962e062f1f", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of the model checkpoint at sagorsarker/codeswitch-spaeng-lid-lince were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']\n", "- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n" ] }, { "data": { "text/plain": [ "[{'entity': 'en',\n", " 'score': 0.9998629,\n", " 'index': 1,\n", " 'word': 'So',\n", " 'start': 0,\n", " 'end': 2},\n", " {'entity': 'other',\n", " 'score': 0.9999267,\n", " 'index': 2,\n", " 'word': ',',\n", " 'start': 2,\n", " 'end': 3},\n", " {'entity': 'en',\n", " 'score': 0.99985707,\n", " 'index': 3,\n", " 'word': 'if',\n", " 'start': 4,\n", " 'end': 6},\n", " {'entity': 'en',\n", " 'score': 0.99984396,\n", " 'index': 4,\n", " 'word': 'you',\n", " 'start': 7,\n", " 'end': 10},\n", " {'entity': 'en',\n", " 'score': 0.9998392,\n", " 'index': 5,\n", " 'word': \"'\",\n", " 'start': 10,\n", " 'end': 11},\n", " {'entity': 'en',\n", " 'score': 0.9998178,\n", " 'index': 6,\n", " 'word': 're',\n", " 'start': 11,\n", " 'end': 13},\n", " {'entity': 'en',\n", " 'score': 0.99961334,\n", " 'index': 7,\n", " 'word': 'a',\n", " 'start': 14,\n", " 'end': 15},\n", " {'entity': 'ne',\n", " 'score': 0.99565876,\n", " 'index': 8,\n", " 'word': 'NASA',\n", " 'start': 16,\n", " 'end': 20},\n", " {'entity': 'en',\n", " 'score': 0.9997341,\n", " 'index': 9,\n", " 'word': 'scientist',\n", " 'start': 21,\n", " 'end': 30},\n", " {'entity': 'other',\n", " 'score': 0.99992585,\n", " 'index': 10,\n", " 'word': ',',\n", " 'start': 30,\n", " 'end': 31},\n", " {'entity': 'en',\n", " 'score': 0.99986625,\n", " 'index': 11,\n", " 'word': 'you',\n", " 'start': 32,\n", " 'end': 35},\n", " {'entity': 'en',\n", " 'score': 0.99986553,\n", " 'index': 12,\n", " 'word': 'should',\n", " 'start': 36,\n", " 'end': 42},\n", " {'entity': 'en',\n", " 'score': 0.9998518,\n", " 'index': 13,\n", " 'word': 'be',\n", " 'start': 43,\n", " 'end': 45},\n", " {'entity': 'en',\n", " 'score': 0.9998604,\n", " 'index': 14,\n", " 'word': 'able',\n", " 'start': 46,\n", " 'end': 50},\n", " {'entity': 'en',\n", " 'score': 0.99985576,\n", " 'index': 15,\n", " 'word': 'to',\n", " 'start': 51,\n", " 'end': 53},\n", " {'entity': 'en',\n", " 'score': 0.999874,\n", " 'index': 16,\n", " 'word': 'tell',\n", " 'start': 54,\n", " 'end': 58},\n", " {'entity': 'en',\n", " 'score': 0.9998467,\n", " 'index': 17,\n", " 'word': 'me',\n", " 'start': 59,\n", " 'end': 61},\n", " {'entity': 'en',\n", " 'score': 0.99984443,\n", " 'index': 18,\n", " 'word': 'the',\n", " 'start': 62,\n", " 'end': 65},\n", " {'entity': 'en',\n", " 'score': 0.99985886,\n", " 'index': 19,\n", " 'word': 'whole',\n", " 'start': 66,\n", " 'end': 71},\n", " {'entity': 'en',\n", " 'score': 0.9998666,\n", " 'index': 20,\n", " 'word': 'story',\n", " 'start': 72,\n", " 'end': 77},\n", " {'entity': 'en',\n", " 'score': 0.99987817,\n", " 'index': 21,\n", " 'word': 'about',\n", " 'start': 78,\n", " 'end': 83},\n", " {'entity': 'en',\n", " 'score': 0.99987066,\n", " 'index': 22,\n", " 'word': 'the',\n", " 'start': 84,\n", " 'end': 87},\n", " {'entity': 'en',\n", " 'score': 0.9998729,\n", " 'index': 23,\n", " 'word': 'Face',\n", " 'start': 88,\n", " 'end': 92},\n", " {'entity': 'en',\n", " 'score': 0.9998679,\n", " 'index': 24,\n", " 'word': 'On',\n", " 'start': 93,\n", " 'end': 95},\n", " {'entity': 'ne',\n", " 'score': 0.9943815,\n", " 'index': 25,\n", " 'word': 'Mars',\n", " 'start': 96,\n", " 'end': 100},\n", " {'entity': 'other',\n", " 'score': 0.9999286,\n", " 'index': 26,\n", " 'word': ',',\n", " 'start': 100,\n", " 'end': 101},\n", " {'entity': 'en',\n", " 'score': 0.9998467,\n", " 'index': 27,\n", " 'word': 'which',\n", " 'start': 102,\n", " 'end': 107},\n", " {'entity': 'en',\n", " 'score': 0.9998258,\n", " 'index': 28,\n", " 'word': 'obvious',\n", " 'start': 108,\n", " 'end': 115},\n", " {'entity': 'en',\n", " 'score': 0.99984264,\n", " 'index': 29,\n", " 'word': '##ly',\n", " 'start': 115,\n", " 'end': 117},\n", " {'entity': 'en',\n", " 'score': 0.9998216,\n", " 'index': 30,\n", " 'word': 'is',\n", " 'start': 118,\n", " 'end': 120},\n", " {'entity': 'en',\n", " 'score': 0.9998173,\n", " 'index': 31,\n", " 'word': 'evidence',\n", " 'start': 121,\n", " 'end': 129},\n", " {'entity': 'en',\n", " 'score': 0.9998375,\n", " 'index': 32,\n", " 'word': 'that',\n", " 'start': 130,\n", " 'end': 134},\n", " {'entity': 'en',\n", " 'score': 0.9998186,\n", " 'index': 33,\n", " 'word': 'there',\n", " 'start': 135,\n", " 'end': 140},\n", " {'entity': 'en',\n", " 'score': 0.99982053,\n", " 'index': 34,\n", " 'word': 'is',\n", " 'start': 141,\n", " 'end': 143},\n", " {'entity': 'en',\n", " 'score': 0.99982506,\n", " 'index': 35,\n", " 'word': 'life',\n", " 'start': 144,\n", " 'end': 148},\n", " {'entity': 'en',\n", " 'score': 0.99981946,\n", " 'index': 36,\n", " 'word': 'on',\n", " 'start': 149,\n", " 'end': 151},\n", " {'entity': 'ne',\n", " 'score': 0.99421823,\n", " 'index': 37,\n", " 'word': 'Mars',\n", " 'start': 152,\n", " 'end': 156},\n", " {'entity': 'other',\n", " 'score': 0.9999292,\n", " 'index': 38,\n", " 'word': ',',\n", " 'start': 156,\n", " 'end': 157},\n", " {'entity': 'en',\n", " 'score': 0.99983776,\n", " 'index': 39,\n", " 'word': 'and',\n", " 'start': 158,\n", " 'end': 161},\n", " {'entity': 'en',\n", " 'score': 0.9998344,\n", " 'index': 40,\n", " 'word': 'that',\n", " 'start': 162,\n", " 'end': 166},\n", " {'entity': 'en',\n", " 'score': 0.9997547,\n", " 'index': 41,\n", " 'word': 'the',\n", " 'start': 167,\n", " 'end': 170},\n", " {'entity': 'en',\n", " 'score': 0.99978274,\n", " 'index': 42,\n", " 'word': 'face',\n", " 'start': 171,\n", " 'end': 175},\n", " {'entity': 'en',\n", " 'score': 0.9997836,\n", " 'index': 43,\n", " 'word': 'was',\n", " 'start': 176,\n", " 'end': 179},\n", " {'entity': 'en',\n", " 'score': 0.99971956,\n", " 'index': 44,\n", " 'word': 'created',\n", " 'start': 180,\n", " 'end': 187},\n", " {'entity': 'en',\n", " 'score': 0.99975187,\n", " 'index': 45,\n", " 'word': 'by',\n", " 'start': 188,\n", " 'end': 190},\n", " {'entity': 'en',\n", " 'score': 0.9997137,\n", " 'index': 46,\n", " 'word': 'alien',\n", " 'start': 191,\n", " 'end': 196},\n", " {'entity': 'en',\n", " 'score': 0.99977976,\n", " 'index': 47,\n", " 'word': '##s',\n", " 'start': 196,\n", " 'end': 197},\n", " {'entity': 'other',\n", " 'score': 0.9999249,\n", " 'index': 48,\n", " 'word': ',',\n", " 'start': 197,\n", " 'end': 198},\n", " {'entity': 'en',\n", " 'score': 0.99981195,\n", " 'index': 49,\n", " 'word': 'correct',\n", " 'start': 199,\n", " 'end': 206},\n", " {'entity': 'other',\n", " 'score': 0.9999312,\n", " 'index': 50,\n", " 'word': '?',\n", " 'start': 206,\n", " 'end': 207},\n", " {'entity': 'other',\n", " 'score': 0.9999089,\n", " 'index': 51,\n", " 'word': '\"',\n", " 'start': 207,\n", " 'end': 208},\n", " {'entity': 'en',\n", " 'score': 0.9864689,\n", " 'index': 52,\n", " 'word': 'No',\n", " 'start': 209,\n", " 'end': 211},\n", " {'entity': 'other',\n", " 'score': 0.99993014,\n", " 'index': 53,\n", " 'word': ',',\n", " 'start': 211,\n", " 'end': 212},\n", " {'entity': 'en',\n", " 'score': 0.9997601,\n", " 'index': 54,\n", " 'word': 'twenty',\n", " 'start': 213,\n", " 'end': 219},\n", " {'entity': 'en',\n", " 'score': 0.9996724,\n", " 'index': 55,\n", " 'word': 'five',\n", " 'start': 220,\n", " 'end': 224},\n", " {'entity': 'en',\n", " 'score': 0.99973506,\n", " 'index': 56,\n", " 'word': 'years',\n", " 'start': 225,\n", " 'end': 230},\n", " {'entity': 'en',\n", " 'score': 0.9997675,\n", " 'index': 57,\n", " 'word': 'ago',\n", " 'start': 231,\n", " 'end': 234},\n", " {'entity': 'other',\n", " 'score': 0.9999273,\n", " 'index': 58,\n", " 'word': ',',\n", " 'start': 234,\n", " 'end': 235},\n", " {'entity': 'en',\n", " 'score': 0.9992092,\n", " 'index': 59,\n", " 'word': 'our',\n", " 'start': 236,\n", " 'end': 239},\n", " {'entity': 'ne',\n", " 'score': 0.62706536,\n", " 'index': 60,\n", " 'word': 'Viking',\n", " 'start': 240,\n", " 'end': 246},\n", " {'entity': 'other',\n", " 'score': 0.9996927,\n", " 'index': 61,\n", " 'word': '1',\n", " 'start': 247,\n", " 'end': 248},\n", " {'entity': 'en',\n", " 'score': 0.9995969,\n", " 'index': 62,\n", " 'word': 'spacecraft',\n", " 'start': 249,\n", " 'end': 259},\n", " {'entity': 'en',\n", " 'score': 0.9997328,\n", " 'index': 63,\n", " 'word': 'was',\n", " 'start': 260,\n", " 'end': 263},\n", " {'entity': 'en',\n", " 'score': 0.9997286,\n", " 'index': 64,\n", " 'word': 'ci',\n", " 'start': 264,\n", " 'end': 266},\n", " {'entity': 'en',\n", " 'score': 0.9997693,\n", " 'index': 65,\n", " 'word': '##rc',\n", " 'start': 266,\n", " 'end': 268},\n", " {'entity': 'en',\n", " 'score': 0.99981683,\n", " 'index': 66,\n", " 'word': '##ling',\n", " 'start': 268,\n", " 'end': 272},\n", " {'entity': 'en',\n", " 'score': 0.99974436,\n", " 'index': 67,\n", " 'word': 'the',\n", " 'start': 273,\n", " 'end': 276},\n", " {'entity': 'en',\n", " 'score': 0.999772,\n", " 'index': 68,\n", " 'word': 'planet',\n", " 'start': 277,\n", " 'end': 283},\n", " {'entity': 'other',\n", " 'score': 0.9999294,\n", " 'index': 69,\n", " 'word': ',',\n", " 'start': 283,\n", " 'end': 284},\n", " {'entity': 'en',\n", " 'score': 0.9995789,\n", " 'index': 70,\n", " 'word': 'sna',\n", " 'start': 285,\n", " 'end': 288},\n", " {'entity': 'en',\n", " 'score': 0.9996741,\n", " 'index': 71,\n", " 'word': '##pping',\n", " 'start': 288,\n", " 'end': 293},\n", " {'entity': 'en',\n", " 'score': 0.99957246,\n", " 'index': 72,\n", " 'word': 'photos',\n", " 'start': 294,\n", " 'end': 300},\n", " {'entity': 'other',\n", " 'score': 0.99993145,\n", " 'index': 73,\n", " 'word': ',',\n", " 'start': 300,\n", " 'end': 301},\n", " {'entity': 'en',\n", " 'score': 0.9997521,\n", " 'index': 74,\n", " 'word': 'when',\n", " 'start': 302,\n", " 'end': 306},\n", " {'entity': 'en',\n", " 'score': 0.99976677,\n", " 'index': 75,\n", " 'word': 'it',\n", " 'start': 307,\n", " 'end': 309},\n", " {'entity': 'en',\n", " 'score': 0.99971706,\n", " 'index': 76,\n", " 'word': 'spotted',\n", " 'start': 310,\n", " 'end': 317},\n", " {'entity': 'en',\n", " 'score': 0.99961495,\n", " 'index': 77,\n", " 'word': 'the',\n", " 'start': 318,\n", " 'end': 321},\n", " {'entity': 'en',\n", " 'score': 0.9996594,\n", " 'index': 78,\n", " 'word': 'sh',\n", " 'start': 322,\n", " 'end': 324},\n", " {'entity': 'en',\n", " 'score': 0.99976844,\n", " 'index': 79,\n", " 'word': '##adow',\n", " 'start': 324,\n", " 'end': 328},\n", " {'entity': 'en',\n", " 'score': 0.9997464,\n", " 'index': 80,\n", " 'word': '##y',\n", " 'start': 328,\n", " 'end': 329},\n", " {'entity': 'en',\n", " 'score': 0.9997322,\n", " 'index': 81,\n", " 'word': 'like',\n", " 'start': 330,\n", " 'end': 334},\n", " {'entity': 'en',\n", " 'score': 0.99969065,\n", " 'index': 82,\n", " 'word': '##ness',\n", " 'start': 334,\n", " 'end': 338},\n", " {'entity': 'en',\n", " 'score': 0.9996531,\n", " 'index': 83,\n", " 'word': 'of',\n", " 'start': 339,\n", " 'end': 341},\n", " {'entity': 'en',\n", " 'score': 0.9992725,\n", " 'index': 84,\n", " 'word': 'a',\n", " 'start': 342,\n", " 'end': 343},\n", " {'entity': 'en',\n", " 'score': 0.99959975,\n", " 'index': 85,\n", " 'word': 'human',\n", " 'start': 344,\n", " 'end': 349},\n", " {'entity': 'en',\n", " 'score': 0.99973136,\n", " 'index': 86,\n", " 'word': 'face',\n", " 'start': 350,\n", " 'end': 354},\n", " {'entity': 'other',\n", " 'score': 0.9999311,\n", " 'index': 87,\n", " 'word': '.',\n", " 'start': 354,\n", " 'end': 355},\n", " {'entity': 'en',\n", " 'score': 0.9996131,\n", " 'index': 88,\n", " 'word': 'Us',\n", " 'start': 356,\n", " 'end': 358},\n", " {'entity': 'en',\n", " 'score': 0.9996656,\n", " 'index': 89,\n", " 'word': 'scientists',\n", " 'start': 359,\n", " 'end': 369},\n", " {'entity': 'en',\n", " 'score': 0.99980336,\n", " 'index': 90,\n", " 'word': 'figure',\n", " 'start': 370,\n", " 'end': 376},\n", " {'entity': 'en',\n", " 'score': 0.999795,\n", " 'index': 91,\n", " 'word': '##d',\n", " 'start': 376,\n", " 'end': 377},\n", " {'entity': 'en',\n", " 'score': 0.99979943,\n", " 'index': 92,\n", " 'word': 'out',\n", " 'start': 378,\n", " 'end': 381},\n", " {'entity': 'en',\n", " 'score': 0.9997974,\n", " 'index': 93,\n", " 'word': 'that',\n", " 'start': 382,\n", " 'end': 386},\n", " {'entity': 'en',\n", " 'score': 0.99973387,\n", " 'index': 94,\n", " 'word': 'it',\n", " 'start': 387,\n", " 'end': 389},\n", " {'entity': 'en',\n", " 'score': 0.9997162,\n", " 'index': 95,\n", " 'word': 'was',\n", " 'start': 390,\n", " 'end': 393},\n", " {'entity': 'en',\n", " 'score': 0.9996761,\n", " 'index': 96,\n", " 'word': 'just',\n", " 'start': 394,\n", " 'end': 398},\n", " {'entity': 'en',\n", " 'score': 0.9995012,\n", " 'index': 97,\n", " 'word': 'another',\n", " 'start': 399,\n", " 'end': 406},\n", " {'entity': 'en',\n", " 'score': 0.92382264,\n", " 'index': 98,\n", " 'word': 'Mart',\n", " 'start': 407,\n", " 'end': 411},\n", " {'entity': 'en',\n", " 'score': 0.9973562,\n", " 'index': 99,\n", " 'word': '##ian',\n", " 'start': 411,\n", " 'end': 414},\n", " {'entity': 'en',\n", " 'score': 0.7907492,\n", " 'index': 100,\n", " 'word': 'mesa',\n", " 'start': 415,\n", " 'end': 419},\n", " {'entity': 'other',\n", " 'score': 0.9999194,\n", " 'index': 101,\n", " 'word': ',',\n", " 'start': 419,\n", " 'end': 420},\n", " {'entity': 'en',\n", " 'score': 0.9995608,\n", " 'index': 102,\n", " 'word': 'common',\n", " 'start': 421,\n", " 'end': 427},\n", " {'entity': 'en',\n", " 'score': 0.9996966,\n", " 'index': 103,\n", " 'word': 'around',\n", " 'start': 428,\n", " 'end': 434},\n", " {'entity': 'ne',\n", " 'score': 0.9975068,\n", " 'index': 104,\n", " 'word': 'C',\n", " 'start': 435,\n", " 'end': 436},\n", " {'entity': 'ne',\n", " 'score': 0.9957877,\n", " 'index': 105,\n", " 'word': '##yd',\n", " 'start': 436,\n", " 'end': 438},\n", " {'entity': 'ne',\n", " 'score': 0.9961337,\n", " 'index': 106,\n", " 'word': '##onia',\n", " 'start': 438,\n", " 'end': 442},\n", " {'entity': 'other',\n", " 'score': 0.9999093,\n", " 'index': 107,\n", " 'word': ',',\n", " 'start': 442,\n", " 'end': 443},\n", " {'entity': 'en',\n", " 'score': 0.99976605,\n", " 'index': 108,\n", " 'word': 'only',\n", " 'start': 444,\n", " 'end': 448},\n", " {'entity': 'en',\n", " 'score': 0.9997769,\n", " 'index': 109,\n", " 'word': 'this',\n", " 'start': 449,\n", " 'end': 453},\n", " {'entity': 'en',\n", " 'score': 0.99976176,\n", " 'index': 110,\n", " 'word': 'one',\n", " 'start': 454,\n", " 'end': 457},\n", " {'entity': 'en',\n", " 'score': 0.9997123,\n", " 'index': 111,\n", " 'word': 'had',\n", " 'start': 458,\n", " 'end': 461},\n", " {'entity': 'en',\n", " 'score': 0.9997372,\n", " 'index': 112,\n", " 'word': 'sh',\n", " 'start': 462,\n", " 'end': 464},\n", " {'entity': 'en',\n", " 'score': 0.9998115,\n", " 'index': 113,\n", " 'word': '##adow',\n", " 'start': 464,\n", " 'end': 468},\n", " {'entity': 'en',\n", " 'score': 0.99980253,\n", " 'index': 114,\n", " 'word': '##s',\n", " 'start': 468,\n", " 'end': 469},\n", " {'entity': 'en',\n", " 'score': 0.99979705,\n", " 'index': 115,\n", " 'word': 'that',\n", " 'start': 470,\n", " 'end': 474},\n", " {'entity': 'en',\n", " 'score': 0.99979264,\n", " 'index': 116,\n", " 'word': 'made',\n", " 'start': 475,\n", " 'end': 479},\n", " {'entity': 'en',\n", " 'score': 0.9998178,\n", " 'index': 117,\n", " 'word': 'it',\n", " 'start': 480,\n", " 'end': 482},\n", " {'entity': 'en',\n", " 'score': 0.99979025,\n", " 'index': 118,\n", " 'word': 'look',\n", " 'start': 483,\n", " 'end': 487},\n", " {'entity': 'en',\n", " 'score': 0.99977773,\n", " 'index': 119,\n", " 'word': 'like',\n", " 'start': 488,\n", " 'end': 492},\n", " {'entity': 'en',\n", " 'score': 0.99962866,\n", " 'index': 120,\n", " 'word': 'an',\n", " 'start': 493,\n", " 'end': 495},\n", " {'entity': 'ne',\n", " 'score': 0.68299395,\n", " 'index': 121,\n", " 'word': 'Egypt',\n", " 'start': 496,\n", " 'end': 501},\n", " {'entity': 'en',\n", " 'score': 0.9760886,\n", " 'index': 122,\n", " 'word': '##ion',\n", " 'start': 501,\n", " 'end': 504},\n", " {'entity': 'en',\n", " 'score': 0.9997099,\n", " 'index': 123,\n", " 'word': 'Ph',\n", " 'start': 505,\n", " 'end': 507},\n", " {'entity': 'en',\n", " 'score': 0.9998056,\n", " 'index': 124,\n", " 'word': '##ara',\n", " 'start': 507,\n", " 'end': 510},\n", " {'entity': 'en',\n", " 'score': 0.99973387,\n", " 'index': 125,\n", " 'word': '##oh',\n", " 'start': 510,\n", " 'end': 512},\n", " {'entity': 'other',\n", " 'score': 0.9999268,\n", " 'index': 126,\n", " 'word': '.',\n", " 'start': 512,\n", " 'end': 513},\n", " {'entity': 'en',\n", " 'score': 0.99796546,\n", " 'index': 127,\n", " 'word': 'Very',\n", " 'start': 514,\n", " 'end': 518},\n", " {'entity': 'en',\n", " 'score': 0.9990651,\n", " 'index': 128,\n", " 'word': 'few',\n", " 'start': 519,\n", " 'end': 522},\n", " {'entity': 'en',\n", " 'score': 0.9994499,\n", " 'index': 129,\n", " 'word': 'days',\n", " 'start': 523,\n", " 'end': 527},\n", " {'entity': 'en',\n", " 'score': 0.9995864,\n", " 'index': 130,\n", " 'word': 'later',\n", " 'start': 528,\n", " 'end': 533},\n", " {'entity': 'other',\n", " 'score': 0.99990845,\n", " 'index': 131,\n", " 'word': ',',\n", " 'start': 533,\n", " 'end': 534},\n", " {'entity': 'en',\n", " 'score': 0.99973947,\n", " 'index': 132,\n", " 'word': 'we',\n", " 'start': 535,\n", " 'end': 537},\n", " {'entity': 'en',\n", " 'score': 0.99979526,\n", " 'index': 133,\n", " 'word': 'revealed',\n", " 'start': 538,\n", " 'end': 546},\n", " {'entity': 'en',\n", " 'score': 0.9997683,\n", " 'index': 134,\n", " 'word': 'the',\n", " 'start': 547,\n", " 'end': 550},\n", " {'entity': 'en',\n", " 'score': 0.999793,\n", " 'index': 135,\n", " 'word': 'image',\n", " 'start': 551,\n", " 'end': 556},\n", " {'entity': 'en',\n", " 'score': 0.99985754,\n", " 'index': 136,\n", " 'word': 'for',\n", " 'start': 557,\n", " 'end': 560},\n", " {'entity': 'en',\n", " 'score': 0.999843,\n", " 'index': 137,\n", " 'word': 'all',\n", " 'start': 561,\n", " 'end': 564},\n", " {'entity': 'en',\n", " 'score': 0.99983895,\n", " 'index': 138,\n", " 'word': 'to',\n", " 'start': 565,\n", " 'end': 567},\n", " {'entity': 'en',\n", " 'score': 0.9998221,\n", " 'index': 139,\n", " 'word': 'see',\n", " 'start': 568,\n", " 'end': 571},\n", " {'entity': 'other',\n", " 'score': 0.99992704,\n", " 'index': 140,\n", " 'word': ',',\n", " 'start': 571,\n", " 'end': 572},\n", " {'entity': 'en',\n", " 'score': 0.9997582,\n", " 'index': 141,\n", " 'word': 'and',\n", " 'start': 573,\n", " 'end': 576},\n", " {'entity': 'en',\n", " 'score': 0.99974746,\n", " 'index': 142,\n", " 'word': 'we',\n", " 'start': 577,\n", " 'end': 579},\n", " {'entity': 'en',\n", " 'score': 0.9997589,\n", " 'index': 143,\n", " 'word': 'made',\n", " 'start': 580,\n", " 'end': 584},\n", " {'entity': 'en',\n", " 'score': 0.9997596,\n", " 'index': 144,\n", " 'word': 'sure',\n", " 'start': 585,\n", " 'end': 589},\n", " {'entity': 'en',\n", " 'score': 0.9997719,\n", " 'index': 145,\n", " 'word': 'to',\n", " 'start': 590,\n", " 'end': 592},\n", " {'entity': 'en',\n", " 'score': 0.99976,\n", " 'index': 146,\n", " 'word': 'note',\n", " 'start': 593,\n", " 'end': 597},\n", " {'entity': 'en',\n", " 'score': 0.9997385,\n", " 'index': 147,\n", " 'word': 'that',\n", " 'start': 598,\n", " 'end': 602},\n", " {'entity': 'en',\n", " 'score': 0.999699,\n", " 'index': 148,\n", " 'word': 'it',\n", " 'start': 603,\n", " 'end': 605},\n", " {'entity': 'en',\n", " 'score': 0.9996177,\n", " 'index': 149,\n", " 'word': 'was',\n", " 'start': 606,\n", " 'end': 609},\n", " {'entity': 'en',\n", " 'score': 0.99906355,\n", " 'index': 150,\n", " 'word': 'a',\n", " 'start': 610,\n", " 'end': 611},\n", " {'entity': 'en',\n", " 'score': 0.9993754,\n", " 'index': 151,\n", " 'word': 'huge',\n", " 'start': 612,\n", " 'end': 616},\n", " {'entity': 'en',\n", " 'score': 0.99958175,\n", " 'index': 152,\n", " 'word': 'rock',\n", " 'start': 617,\n", " 'end': 621},\n", " {'entity': 'en',\n", " 'score': 0.9996152,\n", " 'index': 153,\n", " 'word': 'formation',\n", " 'start': 622,\n", " 'end': 631},\n", " {'entity': 'en',\n", " 'score': 0.9995758,\n", " 'index': 154,\n", " 'word': 'that',\n", " 'start': 632,\n", " 'end': 636},\n", " {'entity': 'en',\n", " 'score': 0.9995844,\n", " 'index': 155,\n", " 'word': 'just',\n", " 'start': 637,\n", " 'end': 641},\n", " {'entity': 'en',\n", " 'score': 0.99950886,\n", " 'index': 156,\n", " 'word': 'res',\n", " 'start': 642,\n", " 'end': 645},\n", " {'entity': 'en',\n", " 'score': 0.9995715,\n", " 'index': 157,\n", " 'word': '##emble',\n", " 'start': 645,\n", " 'end': 650},\n", " {'entity': 'en',\n", " 'score': 0.99958056,\n", " 'index': 158,\n", " 'word': '##d',\n", " 'start': 650,\n", " 'end': 651},\n", " {'entity': 'en',\n", " 'score': 0.99861777,\n", " 'index': 159,\n", " 'word': 'a',\n", " 'start': 652,\n", " 'end': 653},\n", " {'entity': 'en',\n", " 'score': 0.99934405,\n", " 'index': 160,\n", " 'word': 'human',\n", " 'start': 654,\n", " 'end': 659},\n", " {'entity': 'en',\n", " 'score': 0.9995927,\n", " 'index': 161,\n", " 'word': 'head',\n", " 'start': 660,\n", " 'end': 664},\n", " {'entity': 'en',\n", " 'score': 0.9994898,\n", " 'index': 162,\n", " 'word': 'and',\n", " 'start': 665,\n", " 'end': 668},\n", " {'entity': 'en',\n", " 'score': 0.9995479,\n", " 'index': 163,\n", " 'word': 'face',\n", " 'start': 669,\n", " 'end': 673},\n", " {'entity': 'other',\n", " 'score': 0.9999279,\n", " 'index': 164,\n", " 'word': ',',\n", " 'start': 673,\n", " 'end': 674},\n", " {'entity': 'en',\n", " 'score': 0.9997607,\n", " 'index': 165,\n", " 'word': 'but',\n", " 'start': 675,\n", " 'end': 678},\n", " {'entity': 'en',\n", " 'score': 0.99969375,\n", " 'index': 166,\n", " 'word': 'all',\n", " 'start': 679,\n", " 'end': 682},\n", " {'entity': 'en',\n", " 'score': 0.99976605,\n", " 'index': 167,\n", " 'word': 'of',\n", " 'start': 683,\n", " 'end': 685},\n", " {'entity': 'en',\n", " 'score': 0.99975795,\n", " 'index': 168,\n", " 'word': 'it',\n", " 'start': 686,\n", " 'end': 688},\n", " {'entity': 'en',\n", " 'score': 0.99969816,\n", " 'index': 169,\n", " 'word': 'was',\n", " 'start': 689,\n", " 'end': 692},\n", " {'entity': 'en',\n", " 'score': 0.9996636,\n", " 'index': 170,\n", " 'word': 'formed',\n", " 'start': 693,\n", " 'end': 699},\n", " {'entity': 'en',\n", " 'score': 0.9995894,\n", " 'index': 171,\n", " 'word': 'by',\n", " 'start': 700,\n", " 'end': 702},\n", " {'entity': 'en',\n", " 'score': 0.9996039,\n", " 'index': 172,\n", " 'word': 'sh',\n", " 'start': 703,\n", " 'end': 705},\n", " {'entity': 'en',\n", " 'score': 0.9997403,\n", " 'index': 173,\n", " 'word': '##adow',\n", " 'start': 705,\n", " 'end': 709},\n", " {'entity': 'en',\n", " 'score': 0.9997271,\n", " 'index': 174,\n", " 'word': '##s',\n", " 'start': 709,\n", " 'end': 710},\n", " {'entity': 'other',\n", " 'score': 0.9999298,\n", " 'index': 175,\n", " 'word': '.',\n", " 'start': 710,\n", " 'end': 711},\n", " {'entity': 'en',\n", " 'score': 0.9996859,\n", " 'index': 176,\n", " 'word': 'We',\n", " 'start': 712,\n", " 'end': 714},\n", " {'entity': 'en',\n", " 'score': 0.99971765,\n", " 'index': 177,\n", " 'word': 'only',\n", " 'start': 715,\n", " 'end': 719},\n", " {'entity': 'en',\n", " 'score': 0.99975497,\n", " 'index': 178,\n", " 'word': 'announced',\n", " 'start': 720,\n", " 'end': 729},\n", " {'entity': 'en',\n", " 'score': 0.9997633,\n", " 'index': 179,\n", " 'word': 'it',\n", " 'start': 730,\n", " 'end': 732},\n", " {'entity': 'en',\n", " 'score': 0.9997826,\n", " 'index': 180,\n", " 'word': 'because',\n", " 'start': 733,\n", " 'end': 740},\n", " {'entity': 'en',\n", " 'score': 0.99974495,\n", " 'index': 181,\n", " 'word': 'we',\n", " 'start': 741,\n", " 'end': 743},\n", " {'entity': 'en',\n", " 'score': 0.9997533,\n", " 'index': 182,\n", " 'word': 'thought',\n", " 'start': 744,\n", " 'end': 751},\n", " {'entity': 'en',\n", " 'score': 0.9997551,\n", " 'index': 183,\n", " 'word': 'it',\n", " 'start': 752,\n", " 'end': 754},\n", " {'entity': 'en',\n", " 'score': 0.99974245,\n", " 'index': 184,\n", " 'word': 'would',\n", " 'start': 755,\n", " 'end': 760},\n", " {'entity': 'en',\n", " 'score': 0.9996723,\n", " 'index': 185,\n", " 'word': 'be',\n", " 'start': 761,\n", " 'end': 763},\n", " {'entity': 'en',\n", " 'score': 0.9992391,\n", " 'index': 186,\n", " 'word': 'a',\n", " 'start': 764,\n", " 'end': 765},\n", " {'entity': 'en',\n", " 'score': 0.9996873,\n", " 'index': 187,\n", " 'word': 'good',\n", " 'start': 766,\n", " 'end': 770},\n", " {'entity': 'en',\n", " 'score': 0.9997737,\n", " 'index': 188,\n", " 'word': 'way',\n", " 'start': 771,\n", " 'end': 774},\n", " {'entity': 'en',\n", " 'score': 0.99975866,\n", " 'index': 189,\n", " 'word': 'to',\n", " 'start': 775,\n", " 'end': 777},\n", " {'entity': 'en',\n", " 'score': 0.99975985,\n", " 'index': 190,\n", " 'word': 'engage',\n", " 'start': 778,\n", " 'end': 784},\n", " {'entity': 'en',\n", " 'score': 0.9996723,\n", " 'index': 191,\n", " 'word': 'the',\n", " 'start': 785,\n", " 'end': 788},\n", " {'entity': 'en',\n", " 'score': 0.9997609,\n", " 'index': 192,\n", " 'word': 'public',\n", " 'start': 789,\n", " 'end': 795},\n", " {'entity': 'en',\n", " 'score': 0.9997042,\n", " 'index': 193,\n", " 'word': 'with',\n", " 'start': 796,\n", " 'end': 800},\n", " {'entity': 'ne',\n", " 'score': 0.99519366,\n", " 'index': 194,\n", " 'word': 'NASA',\n", " 'start': 801,\n", " 'end': 805},\n", " {'entity': 'en',\n", " 'score': 0.99966383,\n", " 'index': 195,\n", " 'word': \"'\",\n", " 'start': 805,\n", " 'end': 806},\n", " {'entity': 'en',\n", " 'score': 0.99948585,\n", " 'index': 196,\n", " 'word': 's',\n", " 'start': 806,\n", " 'end': 807},\n", " {'entity': 'en',\n", " 'score': 0.9994748,\n", " 'index': 197,\n", " 'word': 'findings',\n", " 'start': 808,\n", " 'end': 816},\n", " {'entity': 'other',\n", " 'score': 0.99991834,\n", " 'index': 198,\n", " 'word': ',',\n", " 'start': 816,\n", " 'end': 817},\n", " {'entity': 'en',\n", " 'score': 0.99959546,\n", " 'index': 199,\n", " 'word': 'and',\n", " 'start': 818,\n", " 'end': 821},\n", " {'entity': 'en',\n", " 'score': 0.9996618,\n", " 'index': 200,\n", " 'word': 'at',\n", " 'start': 822,\n", " 'end': 824},\n", " {'entity': 'en',\n", " 'score': 0.99977297,\n", " 'index': 201,\n", " 'word': '##rra',\n", " 'start': 824,\n", " 'end': 827},\n", " {'entity': 'en',\n", " 'score': 0.9997421,\n", " 'index': 202,\n", " 'word': '##ct',\n", " 'start': 827,\n", " 'end': 829},\n", " {'entity': 'en',\n", " 'score': 0.9997882,\n", " 'index': 203,\n", " 'word': 'attention',\n", " 'start': 830,\n", " 'end': 839},\n", " {'entity': 'en',\n", " 'score': 0.99970275,\n", " 'index': 204,\n", " 'word': 'to',\n", " 'start': 840,\n", " 'end': 842},\n", " {'entity': 'ne',\n", " 'score': 0.9933374,\n", " 'index': 205,\n", " 'word': 'Mars',\n", " 'start': 843,\n", " 'end': 847},\n", " {'entity': 'other',\n", " 'score': 0.99984145,\n", " 'index': 206,\n", " 'word': '-',\n", " 'start': 847,\n", " 'end': 848},\n", " {'entity': 'other',\n", " 'score': 0.9995536,\n", " 'index': 207,\n", " 'word': '-',\n", " 'start': 848,\n", " 'end': 849},\n", " {'entity': 'en',\n", " 'score': 0.999739,\n", " 'index': 208,\n", " 'word': 'and',\n", " 'start': 850,\n", " 'end': 853},\n", " {'entity': 'en',\n", " 'score': 0.9997385,\n", " 'index': 209,\n", " 'word': 'it',\n", " 'start': 854,\n", " 'end': 856},\n", " {'entity': 'en',\n", " 'score': 0.9997167,\n", " 'index': 210,\n", " 'word': 'did',\n", " 'start': 857,\n", " 'end': 860},\n", " {'entity': 'other',\n", " 'score': 0.99991965,\n", " 'index': 211,\n", " 'word': '.',\n", " 'start': 860,\n", " 'end': 861},\n", " {'entity': 'en',\n", " 'score': 0.9994844,\n", " 'index': 212,\n", " 'word': 'The',\n", " 'start': 863,\n", " 'end': 866},\n", " {'entity': 'en',\n", " 'score': 0.9995809,\n", " 'index': 213,\n", " 'word': 'face',\n", " 'start': 867,\n", " 'end': 871},\n", " {'entity': 'en',\n", " 'score': 0.9993393,\n", " 'index': 214,\n", " 'word': 'on',\n", " 'start': 872,\n", " 'end': 874},\n", " {'entity': 'ne',\n", " 'score': 0.9903474,\n", " 'index': 215,\n", " 'word': 'Mars',\n", " 'start': 875,\n", " 'end': 879},\n", " {'entity': 'en',\n", " 'score': 0.99960655,\n", " 'index': 216,\n", " 'word': 'soon',\n", " 'start': 880,\n", " 'end': 884},\n", " {'entity': 'en',\n", " 'score': 0.9992306,\n", " 'index': 217,\n", " 'word': 'became',\n", " 'start': 885,\n", " 'end': 891},\n", " {'entity': 'en',\n", " 'score': 0.9976654,\n", " 'index': 218,\n", " 'word': 'a',\n", " 'start': 892,\n", " 'end': 893},\n", " {'entity': 'en',\n", " 'score': 0.99933064,\n", " 'index': 219,\n", " 'word': 'pop',\n", " 'start': 894,\n", " 'end': 897},\n", " {'entity': 'en',\n", " 'score': 0.99950266,\n", " 'index': 220,\n", " 'word': 'i',\n", " 'start': 898,\n", " 'end': 899},\n", " {'entity': 'en',\n", " 'score': 0.99954295,\n", " 'index': 221,\n", " 'word': '##con',\n", " 'start': 899,\n", " 'end': 902},\n", " {'entity': 'other',\n", " 'score': 0.99992657,\n", " 'index': 222,\n", " 'word': ';',\n", " 'start': 902,\n", " 'end': 903},\n", " {'entity': 'en',\n", " 'score': 0.9994814,\n", " 'index': 223,\n", " 'word': 'shot',\n", " 'start': 904,\n", " 'end': 908},\n", " {'entity': 'en',\n", " 'score': 0.9994511,\n", " 'index': 224,\n", " 'word': 'in',\n", " 'start': 909,\n", " 'end': 911},\n", " {'entity': 'en',\n", " 'score': 0.9992155,\n", " 'index': 225,\n", " 'word': 'movies',\n", " 'start': 912,\n", " 'end': 918},\n", " {'entity': 'other',\n", " 'score': 0.99992335,\n", " 'index': 226,\n", " 'word': ',',\n", " 'start': 918,\n", " 'end': 919},\n", " {'entity': 'en',\n", " 'score': 0.9993299,\n", " 'index': 227,\n", " 'word': 'appeared',\n", " 'start': 920,\n", " 'end': 928},\n", " {'entity': 'en',\n", " 'score': 0.9994456,\n", " 'index': 228,\n", " 'word': 'in',\n", " 'start': 929,\n", " 'end': 931},\n", " {'entity': 'en',\n", " 'score': 0.9991399,\n", " 'index': 229,\n", " 'word': 'books',\n", " 'start': 932,\n", " 'end': 937},\n", " {'entity': 'other',\n", " 'score': 0.9999262,\n", " 'index': 230,\n", " 'word': ',',\n", " 'start': 937,\n", " 'end': 938},\n", " {'entity': 'en',\n", " 'score': 0.99925786,\n", " 'index': 231,\n", " 'word': 'magazines',\n", " 'start': 939,\n", " 'end': 948},\n", " {'entity': 'other',\n", " 'score': 0.99992716,\n", " 'index': 232,\n", " 'word': ',',\n", " 'start': 948,\n", " 'end': 949},\n", " {'entity': 'en',\n", " 'score': 0.99942744,\n", " 'index': 233,\n", " 'word': 'radio',\n", " 'start': 950,\n", " 'end': 955},\n", " {'entity': 'en',\n", " 'score': 0.9996457,\n", " 'index': 234,\n", " 'word': 'talk',\n", " 'start': 956,\n", " 'end': 960},\n", " {'entity': 'en',\n", " 'score': 0.9996043,\n", " 'index': 235,\n", " 'word': 'shows',\n", " 'start': 961,\n", " 'end': 966},\n", " {'entity': 'other',\n", " 'score': 0.9999255,\n", " 'index': 236,\n", " 'word': ',',\n", " 'start': 966,\n", " 'end': 967},\n", " {'entity': 'en',\n", " 'score': 0.99946564,\n", " 'index': 237,\n", " 'word': 'and',\n", " 'start': 968,\n", " 'end': 971},\n", " {'entity': 'en',\n", " 'score': 0.9997336,\n", " 'index': 238,\n", " 'word': 'hau',\n", " 'start': 972,\n", " 'end': 975},\n", " {'entity': 'en',\n", " 'score': 0.99972874,\n", " 'index': 239,\n", " 'word': '##nted',\n", " 'start': 975,\n", " 'end': 979},\n", " {'entity': 'en',\n", " 'score': 0.9996462,\n", " 'index': 240,\n", " 'word': 'gr',\n", " 'start': 980,\n", " 'end': 982},\n", " {'entity': 'en',\n", " 'score': 0.9997458,\n", " 'index': 241,\n", " 'word': '##oce',\n", " 'start': 982,\n", " 'end': 985},\n", " {'entity': 'en',\n", " 'score': 0.9996829,\n", " 'index': 242,\n", " 'word': '##ry',\n", " 'start': 985,\n", " 'end': 987},\n", " {'entity': 'en',\n", " 'score': 0.99970573,\n", " 'index': 243,\n", " 'word': 'store',\n", " 'start': 988,\n", " 'end': 993},\n", " {'entity': 'en',\n", " 'score': 0.9997999,\n", " 'index': 244,\n", " 'word': 'check',\n", " 'start': 994,\n", " 'end': 999},\n", " {'entity': 'en',\n", " 'score': 0.9997851,\n", " 'index': 245,\n", " 'word': '##out',\n", " 'start': 999,\n", " 'end': 1002},\n", " {'entity': 'en',\n", " 'score': 0.9997584,\n", " 'index': 246,\n", " 'word': 'lines',\n", " 'start': 1003,\n", " 'end': 1008},\n", " {'entity': 'en',\n", " 'score': 0.99963033,\n", " 'index': 247,\n", " 'word': 'for',\n", " 'start': 1009,\n", " 'end': 1012},\n", " {'entity': 'other',\n", " 'score': 0.90661347,\n", " 'index': 248,\n", " 'word': '25',\n", " 'start': 1013,\n", " 'end': 1015},\n", " {'entity': 'en',\n", " 'score': 0.99967,\n", " 'index': 249,\n", " 'word': 'years',\n", " 'start': 1016,\n", " 'end': 1021},\n", " {'entity': 'other',\n", " 'score': 0.99992573,\n", " 'index': 250,\n", " 'word': '.',\n", " 'start': 1021,\n", " 'end': 1022},\n", " {'entity': 'en',\n", " 'score': 0.9997687,\n", " 'index': 251,\n", " 'word': 'Some',\n", " 'start': 1023,\n", " 'end': 1027},\n", " {'entity': 'en',\n", " 'score': 0.9997943,\n", " 'index': 252,\n", " 'word': 'people',\n", " 'start': 1028,\n", " 'end': 1034},\n", " {'entity': 'en',\n", " 'score': 0.9997925,\n", " 'index': 253,\n", " 'word': 'thought',\n", " 'start': 1035,\n", " 'end': 1042},\n", " {'entity': 'en',\n", " 'score': 0.9996296,\n", " 'index': 254,\n", " 'word': 'the',\n", " 'start': 1043,\n", " 'end': 1046},\n", " {'entity': 'en',\n", " 'score': 0.99950707,\n", " 'index': 255,\n", " 'word': 'natural',\n", " 'start': 1047,\n", " 'end': 1054},\n", " {'entity': 'en',\n", " 'score': 0.9997507,\n", " 'index': 256,\n", " 'word': 'land',\n", " 'start': 1055,\n", " 'end': 1059},\n", " {'entity': 'en',\n", " 'score': 0.99979764,\n", " 'index': 257,\n", " 'word': '##form',\n", " 'start': 1059,\n", " 'end': 1063},\n", " {'entity': 'en',\n", " 'score': 0.99975556,\n", " 'index': 258,\n", " 'word': 'was',\n", " 'start': 1064,\n", " 'end': 1067},\n", " {'entity': 'en',\n", " 'score': 0.99978846,\n", " 'index': 259,\n", " 'word': 'evidence',\n", " 'start': 1068,\n", " 'end': 1076},\n", " {'entity': 'en',\n", " 'score': 0.9998209,\n", " 'index': 260,\n", " 'word': 'of',\n", " 'start': 1077,\n", " 'end': 1079},\n", " {'entity': 'en',\n", " 'score': 0.99982053,\n", " 'index': 261,\n", " 'word': 'life',\n", " 'start': 1080,\n", " 'end': 1084},\n", " {'entity': 'en',\n", " 'score': 0.999806,\n", " 'index': 262,\n", " 'word': 'on',\n", " 'start': 1085,\n", " 'end': 1087},\n", " {'entity': 'ne',\n", " 'score': 0.9924434,\n", " 'index': 263,\n", " 'word': 'Mars',\n", " 'start': 1088,\n", " 'end': 1092},\n", " {'entity': 'other',\n", " 'score': 0.9999304,\n", " 'index': 264,\n", " 'word': ',',\n", " 'start': 1092,\n", " 'end': 1093},\n", " {'entity': 'en',\n", " 'score': 0.9998184,\n", " 'index': 265,\n", " 'word': 'and',\n", " 'start': 1094,\n", " 'end': 1097},\n", " {'entity': 'en',\n", " 'score': 0.9998319,\n", " 'index': 266,\n", " 'word': 'that',\n", " 'start': 1098,\n", " 'end': 1102},\n", " {'entity': 'en',\n", " 'score': 0.9995714,\n", " 'index': 267,\n", " 'word': 'us',\n", " 'start': 1103,\n", " 'end': 1105},\n", " {'entity': 'en',\n", " 'score': 0.9997626,\n", " 'index': 268,\n", " 'word': 'scientists',\n", " 'start': 1106,\n", " 'end': 1116},\n", " {'entity': 'en',\n", " 'score': 0.9998241,\n", " 'index': 269,\n", " 'word': 'wanted',\n", " 'start': 1117,\n", " 'end': 1123},\n", " {'entity': 'en',\n", " 'score': 0.9998323,\n", " 'index': 270,\n", " 'word': 'to',\n", " 'start': 1124,\n", " 'end': 1126},\n", " {'entity': 'en',\n", " 'score': 0.99986374,\n", " 'index': 271,\n", " 'word': 'hide',\n", " 'start': 1127,\n", " 'end': 1131},\n", " {'entity': 'en',\n", " 'score': 0.99984705,\n", " 'index': 272,\n", " 'word': 'it',\n", " 'start': 1132,\n", " 'end': 1134},\n", " {'entity': 'other',\n", " 'score': 0.99992883,\n", " 'index': 273,\n", " 'word': ',',\n", " 'start': 1134,\n", " 'end': 1135},\n", " {'entity': 'en',\n", " 'score': 0.99986625,\n", " 'index': 274,\n", " 'word': 'but',\n", " 'start': 1136,\n", " 'end': 1139},\n", " {'entity': 'en',\n", " 'score': 0.99982065,\n", " 'index': 275,\n", " 'word': 'really',\n", " 'start': 1140,\n", " 'end': 1146},\n", " {'entity': 'other',\n", " 'score': 0.9999231,\n", " 'index': 276,\n", " 'word': ',',\n", " 'start': 1146,\n", " 'end': 1147},\n", " {'entity': 'en',\n", " 'score': 0.99983907,\n", " 'index': 277,\n", " 'word': 'the',\n", " 'start': 1148,\n", " 'end': 1151},\n", " {'entity': 'en',\n", " 'score': 0.9998418,\n", " 'index': 278,\n", " 'word': 'defender',\n", " 'start': 1152,\n", " 'end': 1160},\n", " {'entity': 'en',\n", " 'score': 0.9998036,\n", " 'index': 279,\n", " 'word': '##s',\n", " 'start': 1160,\n", " 'end': 1161},\n", " {'entity': 'en',\n", " 'score': 0.9998399,\n", " 'index': 280,\n", " 'word': 'of',\n", " 'start': 1162,\n", " 'end': 1164},\n", " {'entity': 'en',\n", " 'score': 0.9997856,\n", " 'index': 281,\n", " 'word': 'the',\n", " 'start': 1165,\n", " 'end': 1168},\n", " {'entity': 'ne',\n", " 'score': 0.9955486,\n", " 'index': 282,\n", " 'word': 'NASA',\n", " 'start': 1169,\n", " 'end': 1173},\n", " {'entity': 'en',\n", " 'score': 0.9998142,\n", " 'index': 283,\n", " 'word': 'budget',\n", " 'start': 1174,\n", " 'end': 1180},\n", " {'entity': 'en',\n", " 'score': 0.9998621,\n", " 'index': 284,\n", " 'word': 'wish',\n", " 'start': 1181,\n", " 'end': 1185},\n", " {'entity': 'en',\n", " 'score': 0.99985635,\n", " 'index': 285,\n", " 'word': 'there',\n", " 'start': 1186,\n", " 'end': 1191},\n", " {'entity': 'en',\n", " 'score': 0.9998217,\n", " 'index': 286,\n", " 'word': 'was',\n", " 'start': 1192,\n", " 'end': 1195},\n", " {'entity': 'en',\n", " 'score': 0.9998242,\n", " 'index': 287,\n", " 'word': 'ancient',\n", " 'start': 1196,\n", " 'end': 1203},\n", " {'entity': 'en',\n", " 'score': 0.9998467,\n", " 'index': 288,\n", " 'word': 'civili',\n", " 'start': 1204,\n", " 'end': 1210},\n", " {'entity': 'en',\n", " 'score': 0.9998492,\n", " 'index': 289,\n", " 'word': '##zation',\n", " 'start': 1210,\n", " 'end': 1216},\n", " {'entity': 'en',\n", " 'score': 0.99985075,\n", " 'index': 290,\n", " 'word': 'on',\n", " 'start': 1217,\n", " 'end': 1219},\n", " {'entity': 'ne',\n", " 'score': 0.9934476,\n", " 'index': 291,\n", " 'word': 'Mars',\n", " 'start': 1220,\n", " 'end': 1224},\n", " {'entity': 'other',\n", " 'score': 0.9999279,\n", " 'index': 292,\n", " 'word': '.',\n", " 'start': 1224,\n", " 'end': 1225},\n", " {'entity': 'en',\n", " 'score': 0.9996929,\n", " 'index': 293,\n", " 'word': 'We',\n", " 'start': 1226,\n", " 'end': 1228},\n", " {'entity': 'en',\n", " 'score': 0.9997583,\n", " 'index': 294,\n", " 'word': 'decided',\n", " 'start': 1229,\n", " 'end': 1236},\n", " {'entity': 'en',\n", " 'score': 0.99972254,\n", " 'index': 295,\n", " 'word': 'to',\n", " 'start': 1237,\n", " 'end': 1239},\n", " {'entity': 'en',\n", " 'score': 0.99963474,\n", " 'index': 296,\n", " 'word': 'take',\n", " 'start': 1240,\n", " 'end': 1244},\n", " {'entity': 'en',\n", " 'score': 0.99941206,\n", " 'index': 297,\n", " 'word': 'another',\n", " 'start': 1245,\n", " 'end': 1252},\n", " {'entity': 'en',\n", " 'score': 0.99968994,\n", " 'index': 298,\n", " 'word': 'shot',\n", " 'start': 1253,\n", " 'end': 1257},\n", " {'entity': 'en',\n", " 'score': 0.9997118,\n", " 'index': 299,\n", " 'word': 'just',\n", " 'start': 1258,\n", " 'end': 1262},\n", " {'entity': 'en',\n", " 'score': 0.9997631,\n", " 'index': 300,\n", " 'word': 'to',\n", " 'start': 1263,\n", " 'end': 1265},\n", " {'entity': 'en',\n", " 'score': 0.99978083,\n", " 'index': 301,\n", " 'word': 'make',\n", " 'start': 1266,\n", " 'end': 1270},\n", " {'entity': 'en',\n", " 'score': 0.9998004,\n", " 'index': 302,\n", " 'word': 'sure',\n", " 'start': 1271,\n", " 'end': 1275},\n", " {'entity': 'en',\n", " 'score': 0.99981683,\n", " 'index': 303,\n", " 'word': 'we',\n", " 'start': 1276,\n", " 'end': 1278},\n", " {'entity': 'en',\n", " 'score': 0.99981815,\n", " 'index': 304,\n", " 'word': 'were',\n", " 'start': 1279,\n", " 'end': 1283},\n", " {'entity': 'en',\n", " 'score': 0.99982005,\n", " 'index': 305,\n", " 'word': '##n',\n", " 'start': 1283,\n", " 'end': 1284},\n", " {'entity': 'en',\n", " 'score': 0.9997904,\n", " 'index': 306,\n", " 'word': \"'\",\n", " 'start': 1284,\n", " 'end': 1285},\n", " {'entity': 'en',\n", " 'score': 0.999744,\n", " 'index': 307,\n", " 'word': 't',\n", " 'start': 1285,\n", " 'end': 1286},\n", " {'entity': 'en',\n", " 'score': 0.9998004,\n", " 'index': 308,\n", " 'word': 'wrong',\n", " 'start': 1287,\n", " 'end': 1292},\n", " {'entity': 'other',\n", " 'score': 0.99991894,\n", " 'index': 309,\n", " 'word': ',',\n", " 'start': 1292,\n", " 'end': 1293},\n", " {'entity': 'en',\n", " 'score': 0.9986118,\n", " 'index': 310,\n", " 'word': 'on',\n", " 'start': 1294,\n", " 'end': 1296},\n", " {'entity': 'other',\n", " 'score': 0.53777504,\n", " 'index': 311,\n", " 'word': 'April',\n", " 'start': 1297,\n", " 'end': 1302},\n", " {'entity': 'other',\n", " 'score': 0.9973557,\n", " 'index': 312,\n", " 'word': '5',\n", " 'start': 1303,\n", " 'end': 1304},\n", " {'entity': 'other',\n", " 'score': 0.9998646,\n", " 'index': 313,\n", " 'word': ',',\n", " 'start': 1304,\n", " 'end': 1305},\n", " {'entity': 'other',\n", " 'score': 0.99943346,\n", " 'index': 314,\n", " 'word': '1998',\n", " 'start': 1306,\n", " 'end': 1310},\n", " {'entity': 'other',\n", " 'score': 0.9999175,\n", " 'index': 315,\n", " 'word': '.',\n", " 'start': 1310,\n", " 'end': 1311},\n", " {'entity': 'ne',\n", " 'score': 0.99906963,\n", " 'index': 316,\n", " 'word': 'Michael',\n", " 'start': 1312,\n", " 'end': 1319},\n", " {'entity': 'ne',\n", " 'score': 0.9983594,\n", " 'index': 317,\n", " 'word': 'Mali',\n", " 'start': 1320,\n", " 'end': 1324},\n", " {'entity': 'ne',\n", " 'score': 0.9985464,\n", " 'index': 318,\n", " 'word': '##n',\n", " 'start': 1324,\n", " 'end': 1325},\n", " {'entity': 'en',\n", " 'score': 0.99965477,\n", " 'index': 319,\n", " 'word': 'and',\n", " 'start': 1326,\n", " 'end': 1329},\n", " {'entity': 'en',\n", " 'score': 0.9990552,\n", " 'index': 320,\n", " 'word': 'his',\n", " 'start': 1330,\n", " 'end': 1333},\n", " {'entity': 'ne',\n", " 'score': 0.97999007,\n", " 'index': 321,\n", " 'word': 'Mars',\n", " 'start': 1334,\n", " 'end': 1338},\n", " {'entity': 'ne',\n", " 'score': 0.6277367,\n", " 'index': 322,\n", " 'word': 'Or',\n", " 'start': 1339,\n", " 'end': 1341},\n", " {'entity': 'ne',\n", " 'score': 0.5293862,\n", " 'index': 323,\n", " 'word': '##biter',\n", " 'start': 1341,\n", " 'end': 1346},\n", " {'entity': 'en',\n", " 'score': 0.9995109,\n", " 'index': 324,\n", " 'word': 'camera',\n", " 'start': 1347,\n", " 'end': 1353},\n", " {'entity': 'en',\n", " 'score': 0.9995858,\n", " 'index': 325,\n", " 'word': 'team',\n", " 'start': 1354,\n", " 'end': 1358},\n", " {'entity': 'en',\n", " 'score': 0.99904686,\n", " 'index': 326,\n", " 'word': 'took',\n", " 'start': 1359,\n", " 'end': 1363},\n", " {'entity': 'en',\n", " 'score': 0.99744225,\n", " 'index': 327,\n", " 'word': 'a',\n", " 'start': 1364,\n", " 'end': 1365},\n", " {'entity': 'en',\n", " 'score': 0.99924374,\n", " 'index': 328,\n", " 'word': 'picture',\n", " 'start': 1366,\n", " 'end': 1373},\n", " {'entity': 'en',\n", " 'score': 0.99940455,\n", " 'index': 329,\n", " 'word': 'that',\n", " 'start': 1374,\n", " 'end': 1378},\n", " {'entity': 'en',\n", " 'score': 0.9994288,\n", " 'index': 330,\n", " 'word': 'was',\n", " 'start': 1379,\n", " 'end': 1382},\n", " {'entity': 'en',\n", " 'score': 0.99890625,\n", " 'index': 331,\n", " 'word': 'ten',\n", " 'start': 1383,\n", " 'end': 1386},\n", " {'entity': 'en',\n", " 'score': 0.9993057,\n", " 'index': 332,\n", " 'word': 'times',\n", " 'start': 1387,\n", " 'end': 1392},\n", " {'entity': 'en',\n", " 'score': 0.99956423,\n", " 'index': 333,\n", " 'word': 'sharp',\n", " 'start': 1393,\n", " 'end': 1398},\n", " {'entity': 'en',\n", " 'score': 0.99929535,\n", " 'index': 334,\n", " 'word': '##er',\n", " 'start': 1398,\n", " 'end': 1400},\n", " {'entity': 'en',\n", " 'score': 0.9995135,\n", " 'index': 335,\n", " 'word': 'than',\n", " 'start': 1401,\n", " 'end': 1405},\n", " {'entity': 'en',\n", " 'score': 0.99948967,\n", " 'index': 336,\n", " 'word': 'the',\n", " 'start': 1406,\n", " 'end': 1409},\n", " {'entity': 'en',\n", " 'score': 0.99922657,\n", " 'index': 337,\n", " 'word': 'original',\n", " 'start': 1410,\n", " 'end': 1418},\n", " {'entity': 'en',\n", " 'score': 0.54231477,\n", " 'index': 338,\n", " 'word': 'Viking',\n", " 'start': 1419,\n", " 'end': 1425},\n", " {'entity': 'en',\n", " 'score': 0.9983171,\n", " 'index': 339,\n", " 'word': 'photos',\n", " 'start': 1426,\n", " 'end': 1432},\n", " {'entity': 'other',\n", " 'score': 0.9999213,\n", " 'index': 340,\n", " 'word': ',',\n", " 'start': 1432,\n", " 'end': 1433},\n", " {'entity': 'en',\n", " 'score': 0.9995522,\n", " 'index': 341,\n", " 'word': 'reveal',\n", " 'start': 1434,\n", " 'end': 1440},\n", " {'entity': 'en',\n", " 'score': 0.9996402,\n", " 'index': 342,\n", " 'word': '##ing',\n", " 'start': 1440,\n", " 'end': 1443},\n", " {'entity': 'en',\n", " 'score': 0.9978428,\n", " 'index': 343,\n", " 'word': 'a',\n", " 'start': 1444,\n", " 'end': 1445},\n", " {'entity': 'en',\n", " 'score': 0.99874496,\n", " 'index': 344,\n", " 'word': 'natural',\n", " 'start': 1446,\n", " 'end': 1453},\n", " {'entity': 'en',\n", " 'score': 0.99962974,\n", " 'index': 345,\n", " 'word': 'land',\n", " 'start': 1454,\n", " 'end': 1458},\n", " {'entity': 'en',\n", " 'score': 0.99966383,\n", " 'index': 346,\n", " 'word': '##form',\n", " 'start': 1458,\n", " 'end': 1462},\n", " {'entity': 'other',\n", " 'score': 0.99993026,\n", " 'index': 347,\n", " 'word': ',',\n", " 'start': 1462,\n", " 'end': 1463},\n", " {'entity': 'en',\n", " 'score': 0.99916697,\n", " 'index': 348,\n", " 'word': 'which',\n", " 'start': 1464,\n", " 'end': 1469},\n", " {'entity': 'en',\n", " 'score': 0.9983835,\n", " 'index': 349,\n", " 'word': 'meant',\n", " 'start': 1470,\n", " 'end': 1475},\n", " {'entity': 'en',\n", " 'score': 0.9928202,\n", " 'index': 350,\n", " 'word': 'no',\n", " 'start': 1476,\n", " 'end': 1478},\n", " {'entity': 'en',\n", " 'score': 0.9962263,\n", " 'index': 351,\n", " 'word': 'alien',\n", " 'start': 1479,\n", " 'end': 1484},\n", " {'entity': 'en',\n", " 'score': 0.9987729,\n", " 'index': 352,\n", " 'word': 'monument',\n", " 'start': 1485,\n", " 'end': 1493},\n", " {'entity': 'other',\n", " 'score': 0.9999304,\n", " 'index': 353,\n", " 'word': '.',\n", " 'start': 1493,\n", " 'end': 1494},\n", " {'entity': 'other',\n", " 'score': 0.99991786,\n", " 'index': 354,\n", " 'word': '\"',\n", " 'start': 1495,\n", " 'end': 1496},\n", " {'entity': 'en',\n", " 'score': 0.99973565,\n", " 'index': 355,\n", " 'word': 'But',\n", " 'start': 1496,\n", " 'end': 1499},\n", " {'entity': 'en',\n", " 'score': 0.9994904,\n", " 'index': 356,\n", " 'word': 'that',\n", " 'start': 1500,\n", " 'end': 1504},\n", " {'entity': 'en',\n", " 'score': 0.9995603,\n", " 'index': 357,\n", " 'word': 'picture',\n", " 'start': 1505,\n", " 'end': 1512},\n", " {'entity': 'en',\n", " 'score': 0.99966073,\n", " 'index': 358,\n", " 'word': 'wasn',\n", " 'start': 1513,\n", " 'end': 1517},\n", " {'entity': 'en',\n", " 'score': 0.99970895,\n", " 'index': 359,\n", " 'word': \"'\",\n", " 'start': 1517,\n", " 'end': 1518},\n", " {'entity': 'en',\n", " 'score': 0.99959975,\n", " 'index': 360,\n", " 'word': 't',\n", " 'start': 1518,\n", " 'end': 1519},\n", " {'entity': 'en',\n", " 'score': 0.9995778,\n", " 'index': 361,\n", " 'word': 'very',\n", " 'start': 1520,\n", " 'end': 1524},\n", " {'entity': 'en',\n", " 'score': 0.99971443,\n", " 'index': 362,\n", " 'word': 'clear',\n", " 'start': 1525,\n", " 'end': 1530},\n", " {'entity': 'en',\n", " 'score': 0.9997217,\n", " 'index': 363,\n", " 'word': 'at',\n", " 'start': 1531,\n", " 'end': 1533},\n", " {'entity': 'en',\n", " 'score': 0.99960893,\n", " 'index': 364,\n", " 'word': 'all',\n", " 'start': 1534,\n", " 'end': 1537},\n", " {'entity': 'other',\n", " 'score': 0.99993,\n", " 'index': 365,\n", " 'word': ',',\n", " 'start': 1537,\n", " 'end': 1538},\n", " {'entity': 'en',\n", " 'score': 0.99955565,\n", " 'index': 366,\n", " 'word': 'which',\n", " 'start': 1539,\n", " 'end': 1544},\n", " {'entity': 'en',\n", " 'score': 0.99938047,\n", " 'index': 367,\n", " 'word': 'could',\n", " 'start': 1545,\n", " 'end': 1550},\n", " {'entity': 'en',\n", " 'score': 0.99928325,\n", " 'index': 368,\n", " 'word': 'mean',\n", " 'start': 1551,\n", " 'end': 1555},\n", " {'entity': 'en',\n", " 'score': 0.9982596,\n", " 'index': 369,\n", " 'word': 'alien',\n", " 'start': 1556,\n", " 'end': 1561},\n", " {'entity': 'en',\n", " 'score': 0.99910384,\n", " 'index': 370,\n", " 'word': 'marking',\n", " 'start': 1562,\n", " 'end': 1569},\n", " {'entity': 'en',\n", " 'score': 0.99916995,\n", " 'index': 371,\n", " 'word': '##s',\n", " 'start': 1569,\n", " 'end': 1570},\n", " {'entity': 'en',\n", " 'score': 0.99937785,\n", " 'index': 372,\n", " 'word': 'were',\n", " 'start': 1571,\n", " 'end': 1575},\n", " {'entity': 'en',\n", " 'score': 0.99948174,\n", " 'index': 373,\n", " 'word': 'hidden',\n", " 'start': 1576,\n", " 'end': 1582},\n", " {'entity': 'en',\n", " 'score': 0.99941933,\n", " 'index': 374,\n", " 'word': 'by',\n", " 'start': 1583,\n", " 'end': 1585},\n", " {'entity': 'en',\n", " 'score': 0.99944514,\n", " 'index': 375,\n", " 'word': 'ha',\n", " 'start': 1586,\n", " 'end': 1588},\n", " {'entity': 'en',\n", " 'score': 0.9994628,\n", " 'index': 376,\n", " 'word': '##ze',\n", " 'start': 1588,\n", " 'end': 1590},\n", " {'entity': 'other',\n", " 'score': 0.9999194,\n", " 'index': 377,\n", " 'word': '\"',\n", " 'start': 1590,\n", " 'end': 1591},\n", " {'entity': 'en',\n", " 'score': 0.9996394,\n", " 'index': 378,\n", " 'word': 'Well',\n", " 'start': 1592,\n", " 'end': 1596},\n", " {'entity': 'en',\n", " 'score': 0.99759346,\n", " 'index': 379,\n", " 'word': 'no',\n", " 'start': 1597,\n", " 'end': 1599},\n", " {'entity': 'other',\n", " 'score': 0.99993074,\n", " 'index': 380,\n", " 'word': ',',\n", " 'start': 1599,\n", " 'end': 1600},\n", " {'entity': 'en',\n", " 'score': 0.9995228,\n", " 'index': 381,\n", " 'word': 'ye',\n", " 'start': 1601,\n", " 'end': 1603},\n", " {'entity': 'en',\n", " 'score': 0.99969923,\n", " 'index': 382,\n", " 'word': '##s',\n", " 'start': 1603,\n", " 'end': 1604},\n", " {'entity': 'en',\n", " 'score': 0.99970835,\n", " 'index': 383,\n", " 'word': 'that',\n", " 'start': 1605,\n", " 'end': 1609},\n", " {'entity': 'en',\n", " 'score': 0.999665,\n", " 'index': 384,\n", " 'word': 'rum',\n", " 'start': 1610,\n", " 'end': 1613},\n", " {'entity': 'en',\n", " 'score': 0.99970514,\n", " 'index': 385,\n", " 'word': '##or',\n", " 'start': 1613,\n", " 'end': 1615},\n", " {'entity': 'en',\n", " 'score': 0.99943703,\n", " 'index': 386,\n", " 'word': 'started',\n", " 'start': 1616,\n", " 'end': 1623},\n", " {'entity': 'other',\n", " 'score': 0.9999294,\n", " 'index': 387,\n", " 'word': ',',\n", " 'start': 1623,\n", " 'end': 1624},\n", " {'entity': 'en',\n", " 'score': 0.99961936,\n", " 'index': 388,\n", " 'word': 'but',\n", " 'start': 1625,\n", " 'end': 1628},\n", " {'entity': 'en',\n", " 'score': 0.99959284,\n", " 'index': 389,\n", " 'word': 'to',\n", " 'start': 1629,\n", " 'end': 1631},\n", " {'entity': 'en',\n", " 'score': 0.99967396,\n", " 'index': 390,\n", " 'word': 'prove',\n", " 'start': 1632,\n", " 'end': 1637},\n", " {'entity': 'en',\n", " 'score': 0.9996499,\n", " 'index': 391,\n", " 'word': 'them',\n", " 'start': 1638,\n", " 'end': 1642},\n", " {'entity': 'en',\n", " 'score': 0.9996276,\n", " 'index': 392,\n", " 'word': 'wrong',\n", " 'start': 1643,\n", " 'end': 1648},\n", " {'entity': 'en',\n", " 'score': 0.99944097,\n", " 'index': 393,\n", " 'word': 'on',\n", " 'start': 1649,\n", " 'end': 1651},\n", " {'entity': 'en',\n", " 'score': 0.61485404,\n", " 'index': 394,\n", " 'word': 'April',\n", " 'start': 1652,\n", " 'end': 1657},\n", " {'entity': 'other',\n", " 'score': 0.99845207,\n", " 'index': 395,\n", " 'word': '8',\n", " 'start': 1658,\n", " 'end': 1659},\n", " {'entity': 'other',\n", " 'score': 0.9998248,\n", " 'index': 396,\n", " 'word': ',',\n", " 'start': 1659,\n", " 'end': 1660},\n", " {'entity': 'other',\n", " 'score': 0.967133,\n", " 'index': 397,\n", " 'word': '2001',\n", " 'start': 1661,\n", " 'end': 1665},\n", " {'entity': 'en',\n", " 'score': 0.99961036,\n", " 'index': 398,\n", " 'word': 'we',\n", " 'start': 1666,\n", " 'end': 1668},\n", " {'entity': 'en',\n", " 'score': 0.9996408,\n", " 'index': 399,\n", " 'word': 'decided',\n", " 'start': 1669,\n", " 'end': 1676},\n", " {'entity': 'en',\n", " 'score': 0.99961096,\n", " 'index': 400,\n", " 'word': 'to',\n", " 'start': 1677,\n", " 'end': 1679},\n", " {'entity': 'en',\n", " 'score': 0.9994717,\n", " 'index': 401,\n", " 'word': 'take',\n", " 'start': 1680,\n", " 'end': 1684},\n", " {'entity': 'en',\n", " 'score': 0.99907696,\n", " 'index': 402,\n", " 'word': 'another',\n", " 'start': 1685,\n", " 'end': 1692},\n", " {'entity': 'en',\n", " 'score': 0.9993956,\n", " 'index': 403,\n", " 'word': 'picture',\n", " 'start': 1693,\n", " 'end': 1700},\n", " {'entity': 'other',\n", " 'score': 0.9999089,\n", " 'index': 404,\n", " 'word': ',',\n", " 'start': 1700,\n", " 'end': 1701},\n", " {'entity': 'en',\n", " 'score': 0.9996431,\n", " 'index': 405,\n", " 'word': 'making',\n", " 'start': 1702,\n", " 'end': 1708},\n", " {'entity': 'en',\n", " 'score': 0.9997148,\n", " 'index': 406,\n", " 'word': 'sure',\n", " 'start': 1709,\n", " 'end': 1713},\n", " {'entity': 'en',\n", " 'score': 0.9997253,\n", " 'index': 407,\n", " 'word': 'it',\n", " 'start': 1714,\n", " 'end': 1716},\n", " {'entity': 'en',\n", " 'score': 0.99960643,\n", " 'index': 408,\n", " 'word': 'was',\n", " 'start': 1717,\n", " 'end': 1720},\n", " {'entity': 'en',\n", " 'score': 0.9989556,\n", " 'index': 409,\n", " 'word': 'a',\n", " 'start': 1721,\n", " 'end': 1722},\n", " {'entity': 'en',\n", " 'score': 0.99975353,\n", " 'index': 410,\n", " 'word': 'cloud',\n", " 'start': 1723,\n", " 'end': 1728},\n", " {'entity': 'en',\n", " 'score': 0.999777,\n", " 'index': 411,\n", " 'word': '##less',\n", " 'start': 1728,\n", " 'end': 1732},\n", " {'entity': 'en',\n", " 'score': 0.9997631,\n", " 'index': 412,\n", " 'word': 'summer',\n", " 'start': 1733,\n", " 'end': 1739},\n", " {'entity': 'en',\n", " 'score': 0.99969566,\n", " 'index': 413,\n", " 'word': 'day',\n", " 'start': 1740,\n", " 'end': 1743},\n", " {'entity': 'other',\n", " 'score': 0.9999254,\n", " 'index': 414,\n", " 'word': '.',\n", " 'start': 1743,\n", " 'end': 1744},\n", " {'entity': 'ne',\n", " 'score': 0.9956275,\n", " 'index': 415,\n", " 'word': 'Mali',\n", " 'start': 1745,\n", " 'end': 1749},\n", " {'entity': 'ne',\n", " 'score': 0.9934854,\n", " 'index': 416,\n", " 'word': '##n',\n", " 'start': 1749,\n", " 'end': 1750},\n", " {'entity': 'en',\n", " 'score': 0.9940276,\n", " 'index': 417,\n", " 'word': \"'\",\n", " 'start': 1750,\n", " 'end': 1751},\n", " {'entity': 'en',\n", " 'score': 0.9994653,\n", " 'index': 418,\n", " 'word': 's',\n", " 'start': 1751,\n", " 'end': 1752},\n", " {'entity': 'en',\n", " 'score': 0.99972516,\n", " 'index': 419,\n", " 'word': 'team',\n", " 'start': 1753,\n", " 'end': 1757},\n", " {'entity': 'en',\n", " 'score': 0.9996685,\n", " 'index': 420,\n", " 'word': 'captured',\n", " 'start': 1758,\n", " 'end': 1766},\n", " {'entity': 'en',\n", " 'score': 0.99941444,\n", " 'index': 421,\n", " 'word': 'an',\n", " 'start': 1767,\n", " 'end': 1769},\n", " {'entity': 'en',\n", " 'score': 0.99968505,\n", " 'index': 422,\n", " 'word': 'ama',\n", " 'start': 1770,\n", " 'end': 1773},\n", " {'entity': 'en',\n", " 'score': 0.99972624,\n", " 'index': 423,\n", " 'word': '##zing',\n", " 'start': 1773,\n", " 'end': 1777},\n", " {'entity': 'en',\n", " 'score': 0.9996338,\n", " 'index': 424,\n", " 'word': 'photo',\n", " 'start': 1778,\n", " 'end': 1783},\n", " {'entity': 'en',\n", " 'score': 0.9996408,\n", " 'index': 425,\n", " 'word': 'using',\n", " 'start': 1784,\n", " 'end': 1789},\n", " {'entity': 'en',\n", " 'score': 0.9994491,\n", " 'index': 426,\n", " 'word': 'the',\n", " 'start': 1790,\n", " 'end': 1793},\n", " {'entity': 'en',\n", " 'score': 0.99961936,\n", " 'index': 427,\n", " 'word': 'camera',\n", " 'start': 1794,\n", " 'end': 1800},\n", " {'entity': 'en',\n", " 'score': 0.9996668,\n", " 'index': 428,\n", " 'word': \"'\",\n", " 'start': 1800,\n", " 'end': 1801},\n", " {'entity': 'en',\n", " 'score': 0.9995703,\n", " 'index': 429,\n", " 'word': 's',\n", " 'start': 1801,\n", " 'end': 1802},\n", " {'entity': 'en',\n", " 'score': 0.99967444,\n", " 'index': 430,\n", " 'word': 'absolute',\n", " 'start': 1803,\n", " 'end': 1811},\n", " {'entity': 'en',\n", " 'score': 0.99970645,\n", " 'index': 431,\n", " 'word': 'maximum',\n", " 'start': 1812,\n", " 'end': 1819},\n", " {'entity': 'en',\n", " 'score': 0.9996973,\n", " 'index': 432,\n", " 'word': 'revolution',\n", " 'start': 1820,\n", " 'end': 1830},\n", " {'entity': 'other',\n", " 'score': 0.99992955,\n", " 'index': 433,\n", " 'word': '.',\n", " 'start': 1830,\n", " 'end': 1831},\n", " {'entity': 'en',\n", " 'score': 0.9997081,\n", " 'index': 434,\n", " 'word': 'With',\n", " 'start': 1832,\n", " 'end': 1836},\n", " {'entity': 'en',\n", " 'score': 0.99963665,\n", " 'index': 435,\n", " 'word': 'this',\n", " 'start': 1837,\n", " 'end': 1841},\n", " {'entity': 'en',\n", " 'score': 0.99965286,\n", " 'index': 436,\n", " 'word': 'camera',\n", " 'start': 1842,\n", " 'end': 1848},\n", " {'entity': 'en',\n", " 'score': 0.9997528,\n", " 'index': 437,\n", " 'word': 'you',\n", " 'start': 1849,\n", " 'end': 1852},\n", " {'entity': 'en',\n", " 'score': 0.9997576,\n", " 'index': 438,\n", " 'word': 'can',\n", " 'start': 1853,\n", " 'end': 1856},\n", " {'entity': 'en',\n", " 'score': 0.9997657,\n", " 'index': 439,\n", " 'word': 'disc',\n", " 'start': 1857,\n", " 'end': 1861},\n", " {'entity': 'en',\n", " 'score': 0.9997956,\n", " 'index': 440,\n", " 'word': '##ern',\n", " 'start': 1861,\n", " 'end': 1864},\n", " {'entity': 'en',\n", " 'score': 0.9997335,\n", " 'index': 441,\n", " 'word': 'things',\n", " 'start': 1865,\n", " 'end': 1871},\n", " {'entity': 'en',\n", " 'score': 0.999653,\n", " 'index': 442,\n", " 'word': 'in',\n", " 'start': 1872,\n", " 'end': 1874},\n", " {'entity': 'en',\n", " 'score': 0.9991596,\n", " 'index': 443,\n", " 'word': 'a',\n", " 'start': 1875,\n", " 'end': 1876},\n", " {'entity': 'en',\n", " 'score': 0.99945194,\n", " 'index': 444,\n", " 'word': 'digital',\n", " 'start': 1877,\n", " 'end': 1884},\n", " {'entity': 'en',\n", " 'score': 0.9995933,\n", " 'index': 445,\n", " 'word': 'image',\n", " 'start': 1885,\n", " 'end': 1890},\n", " {'entity': 'other',\n", " 'score': 0.9999232,\n", " 'index': 446,\n", " 'word': ',',\n", " 'start': 1890,\n", " 'end': 1891},\n", " {'entity': 'other',\n", " 'score': 0.99489,\n", " 'index': 447,\n", " 'word': '3',\n", " 'start': 1892,\n", " 'end': 1893},\n", " {'entity': 'en',\n", " 'score': 0.9991105,\n", " 'index': 448,\n", " 'word': 'times',\n", " 'start': 1894,\n", " 'end': 1899},\n", " {'entity': 'en',\n", " 'score': 0.9994937,\n", " 'index': 449,\n", " 'word': 'bigger',\n", " 'start': 1900,\n", " 'end': 1906},\n", " {'entity': 'en',\n", " 'score': 0.9995734,\n", " 'index': 450,\n", " 'word': 'than',\n", " 'start': 1907,\n", " 'end': 1911},\n", " {'entity': 'en',\n", " 'score': 0.9995715,\n", " 'index': 451,\n", " 'word': 'the',\n", " 'start': 1912,\n", " 'end': 1915},\n", " {'entity': 'en',\n", " 'score': 0.9996369,\n", " 'index': 452,\n", " 'word': 'pi',\n", " 'start': 1916,\n", " 'end': 1918},\n", " {'entity': 'en',\n", " 'score': 0.9996636,\n", " 'index': 453,\n", " 'word': '##xel',\n", " 'start': 1918,\n", " 'end': 1921},\n", " {'entity': 'en',\n", " 'score': 0.99961394,\n", " 'index': 454,\n", " 'word': 'size',\n", " 'start': 1922,\n", " 'end': 1926},\n", " {'entity': 'en',\n", " 'score': 0.9996786,\n", " 'index': 455,\n", " 'word': 'which',\n", " 'start': 1927,\n", " 'end': 1932},\n", " {'entity': 'en',\n", " 'score': 0.9996667,\n", " 'index': 456,\n", " 'word': 'means',\n", " 'start': 1933,\n", " 'end': 1938},\n", " {'entity': 'en',\n", " 'score': 0.9997652,\n", " 'index': 457,\n", " 'word': 'if',\n", " 'start': 1939,\n", " 'end': 1941},\n", " {'entity': 'en',\n", " 'score': 0.99967515,\n", " 'index': 458,\n", " 'word': 'there',\n", " 'start': 1942,\n", " 'end': 1947},\n", " {'entity': 'en',\n", " 'score': 0.9996507,\n", " 'index': 459,\n", " 'word': 'were',\n", " 'start': 1948,\n", " 'end': 1952},\n", " {'entity': 'en',\n", " 'score': 0.99955803,\n", " 'index': 460,\n", " 'word': 'any',\n", " 'start': 1953,\n", " 'end': 1956},\n", " {'entity': 'en',\n", " 'score': 0.99953544,\n", " 'index': 461,\n", " 'word': 'signs',\n", " 'start': 1957,\n", " 'end': 1962},\n", " {'entity': 'en',\n", " 'score': 0.99960965,\n", " 'index': 462,\n", " 'word': 'of',\n", " 'start': 1963,\n", " 'end': 1965},\n", " {'entity': 'en',\n", " 'score': 0.99961805,\n", " 'index': 463,\n", " 'word': 'life',\n", " 'start': 1966,\n", " 'end': 1970},\n", " {'entity': 'other',\n", " 'score': 0.9999225,\n", " 'index': 464,\n", " 'word': ',',\n", " 'start': 1970,\n", " 'end': 1971},\n", " {'entity': 'en',\n", " 'score': 0.9997342,\n", " 'index': 465,\n", " 'word': 'you',\n", " 'start': 1972,\n", " 'end': 1975},\n", " {'entity': 'en',\n", " 'score': 0.99973625,\n", " 'index': 466,\n", " 'word': 'could',\n", " 'start': 1976,\n", " 'end': 1981},\n", " {'entity': 'en',\n", " 'score': 0.9997397,\n", " 'index': 467,\n", " 'word': 'easily',\n", " 'start': 1982,\n", " 'end': 1988},\n", " {'entity': 'en',\n", " 'score': 0.99969923,\n", " 'index': 468,\n", " 'word': 'see',\n", " 'start': 1989,\n", " 'end': 1992},\n", " {'entity': 'en',\n", " 'score': 0.999673,\n", " 'index': 469,\n", " 'word': 'what',\n", " 'start': 1993,\n", " 'end': 1997},\n", " {'entity': 'en',\n", " 'score': 0.9996773,\n", " 'index': 470,\n", " 'word': 'they',\n", " 'start': 1998,\n", " 'end': 2002},\n", " {'entity': 'en',\n", " 'score': 0.9995364,\n", " 'index': 471,\n", " 'word': 'were',\n", " 'start': 2003,\n", " 'end': 2007},\n", " {'entity': 'other',\n", " 'score': 0.99993205,\n", " 'index': 472,\n", " 'word': '.',\n", " 'start': 2007,\n", " 'end': 2008},\n", " {'entity': 'en',\n", " 'score': 0.99950016,\n", " 'index': 473,\n", " 'word': 'What',\n", " 'start': 2009,\n", " 'end': 2013},\n", " {'entity': 'en',\n", " 'score': 0.999288,\n", " 'index': 474,\n", " 'word': 'the',\n", " 'start': 2014,\n", " 'end': 2017},\n", " {'entity': 'en',\n", " 'score': 0.99938726,\n", " 'index': 475,\n", " 'word': 'picture',\n", " 'start': 2018,\n", " 'end': 2025},\n", " {'entity': 'en',\n", " 'score': 0.9994843,\n", " 'index': 476,\n", " 'word': 'showed',\n", " 'start': 2026,\n", " 'end': 2032},\n", " {'entity': 'en',\n", " 'score': 0.99938786,\n", " 'index': 477,\n", " 'word': 'was',\n", " 'start': 2033,\n", " 'end': 2036},\n", " {'entity': 'en',\n", " 'score': 0.99811554,\n", " 'index': 478,\n", " 'word': 'the',\n", " 'start': 2037,\n", " 'end': 2040},\n", " {'entity': 'en',\n", " 'score': 0.9786213,\n", " 'index': 479,\n", " 'word': 'but',\n", " 'start': 2041,\n", " 'end': 2044},\n", " {'entity': 'en',\n", " 'score': 0.99004257,\n", " 'index': 480,\n", " 'word': '##te',\n", " 'start': 2044,\n", " 'end': 2046},\n", " {'entity': 'en',\n", " 'score': 0.99612516,\n", " 'index': 481,\n", " 'word': 'or',\n", " 'start': 2047,\n", " 'end': 2049},\n", " {'entity': 'en',\n", " 'score': 0.87278074,\n", " 'index': 482,\n", " 'word': 'mesa',\n", " 'start': 2050,\n", " 'end': 2054},\n", " {'entity': 'other',\n", " 'score': 0.9999268,\n", " 'index': 483,\n", " 'word': ',',\n", " 'start': 2054,\n", " 'end': 2055},\n", " {'entity': 'en',\n", " 'score': 0.99910945,\n", " 'index': 484,\n", " 'word': 'which',\n", " 'start': 2056,\n", " 'end': 2061},\n", " {'entity': 'en',\n", " 'score': 0.9985214,\n", " 'index': 485,\n", " 'word': 'are',\n", " 'start': 2062,\n", " 'end': 2065},\n", " {'entity': 'en',\n", " 'score': 0.9983,\n", " 'index': 486,\n", " 'word': 'land',\n", " 'start': 2066,\n", " 'end': 2070},\n", " {'entity': 'en',\n", " 'score': 0.9991359,\n", " 'index': 487,\n", " 'word': '##form',\n", " 'start': 2070,\n", " 'end': 2074},\n", " {'entity': 'en',\n", " 'score': 0.99933463,\n", " 'index': 488,\n", " 'word': '##s',\n", " 'start': 2074,\n", " 'end': 2075},\n", " {'entity': 'en',\n", " 'score': 0.9994253,\n", " 'index': 489,\n", " 'word': 'common',\n", " 'start': 2076,\n", " 'end': 2082},\n", " {'entity': 'en',\n", " 'score': 0.9994831,\n", " 'index': 490,\n", " 'word': 'around',\n", " 'start': 2083,\n", " 'end': 2089},\n", " {'entity': 'en',\n", " 'score': 0.99935335,\n", " 'index': 491,\n", " 'word': 'the',\n", " 'start': 2090,\n", " 'end': 2093},\n", " {'entity': 'en',\n", " 'score': 0.9982742,\n", " 'index': 492,\n", " 'word': 'American',\n", " 'start': 2094,\n", " 'end': 2102},\n", " {'entity': 'en',\n", " 'score': 0.99908066,\n", " 'index': 493,\n", " 'word': 'West',\n", " 'start': 2103,\n", " 'end': 2107},\n", " {'entity': 'other',\n", " 'score': 0.9999201,\n", " 'index': 494,\n", " 'word': '.',\n", " 'start': 2107,\n", " 'end': 2108}]" ] }, "execution_count": 127, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(\"sagorsarker/codeswitch-spaeng-lid-lince\")\n", "\n", "model = AutoModelForTokenClassification.from_pretrained(\"sagorsarker/codeswitch-spaeng-lid-lince\")\n", "lid_model = pipeline('ner', model=model, tokenizer=tokenizer)\n", "\n", "lid_model(text)" ] }, { "cell_type": "code", "execution_count": 130, "id": "867c77f9-b9d8-4f29-b07f-cab4ee55a8f3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "en 411\n", "ne 22\n", "other 61\n", "dtype: int64\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
entityscore
entity
en4110.996325
ne220.925739
other610.990110
\n", "
" ], "text/plain": [ " entity score\n", "entity \n", "en 411 0.996325\n", "ne 22 0.925739\n", "other 61 0.990110" ] }, "execution_count": 130, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with open(\"16 sagorsarkercodeswitch-spaeng-lid-lince.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()\n", "\n", "\n", "aux.groupby(['entity']) \\\n", " .agg({'entity':'size', 'score':'mean'}) \n" ] }, { "cell_type": "markdown", "id": "88d47015-a58d-4c0b-94b2-05ac3038199a", "metadata": {}, "source": [ "## 17 jvdzwaan/ocrpostcorrection-task-1" ] }, { "cell_type": "code", "execution_count": 10, "id": "dee93220-b20b-4d65-9bd0-18342703e328", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\NW\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ca718fb5673b4223bddcd4cb42fd9596", "version_major": 2, "version_minor": 0 }, "text/plain": [ "config.json: 0%| | 0.00/1.02k [00:00 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mjvdzwaan/ocrpostcorrection-task-1\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mjvdzwaan/ocrpostcorrection-task-1\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m classifier \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:899\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 896\u001b[0m tokenizer_class_py, tokenizer_class_fast \u001b[38;5;241m=\u001b[39m TOKENIZER_MAPPING[\u001b[38;5;28mtype\u001b[39m(config)]\n\u001b[0;32m 898\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tokenizer_class_fast \u001b[38;5;129;01mand\u001b[39;00m (use_fast \u001b[38;5;129;01mor\u001b[39;00m tokenizer_class_py \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[1;32m--> 899\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m tokenizer_class_fast\u001b[38;5;241m.\u001b[39mfrom_pretrained(pretrained_model_name_or_path, \u001b[38;5;241m*\u001b[39minputs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 900\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 901\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tokenizer_class_py \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\tokenization_utils_base.py:2094\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, trust_remote_code, *init_inputs, **kwargs)\u001b[0m\n\u001b[0;32m 2091\u001b[0m \u001b[38;5;66;03m# If one passes a GGUF file path to `gguf_file` there is no need for this check as the tokenizer will be\u001b[39;00m\n\u001b[0;32m 2092\u001b[0m \u001b[38;5;66;03m# loaded directly from the GGUF file.\u001b[39;00m\n\u001b[0;32m 2093\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mall\u001b[39m(full_file_name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mfor\u001b[39;00m full_file_name \u001b[38;5;129;01min\u001b[39;00m resolved_vocab_files\u001b[38;5;241m.\u001b[39mvalues()) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m gguf_file:\n\u001b[1;32m-> 2094\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[0;32m 2095\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCan\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt load tokenizer for \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m. If you were trying to load it from \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 2096\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttps://huggingface.co/models\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, make sure you don\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt have a local directory with the same name. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 2097\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOtherwise, make sure \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m is the correct path to a directory \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 2098\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontaining all relevant files for a \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m tokenizer.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 2099\u001b[0m )\n\u001b[0;32m 2101\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m file_id, file_path \u001b[38;5;129;01min\u001b[39;00m vocab_files\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m 2102\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m file_id \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m resolved_vocab_files:\n", "\u001b[1;31mOSError\u001b[0m: Can't load tokenizer for 'jvdzwaan/ocrpostcorrection-task-1'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'jvdzwaan/ocrpostcorrection-task-1' is the correct path to a directory containing all relevant files for a BertTokenizerFast tokenizer." ] } ], "source": [ "import json\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"jvdzwaan/ocrpostcorrection-task-1\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"jvdzwaan/ocrpostcorrection-task-1\")\n", "classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "salida=classifier(text)\n", "print(salida)" ] }, { "cell_type": "markdown", "id": "b7babbc0-5f6c-46a4-aea2-65487527ef54", "metadata": {}, "source": [ "## 18 GEOcite/AuthorParserModel" ] }, { "cell_type": "code", "execution_count": 13, "id": "a2ffcb2d-bc31-40e1-b4c6-66aeef61a845", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[{'entity': 'B-ORG', 'score': 0.56347305, 'index': 8, 'word': 'nasa', 'start': 16, 'end': 20}, {'entity': 'I-ORG', 'score': 0.64977086, 'index': 25, 'word': 'mars', 'start': 96, 'end': 100}, {'entity': 'I-ORG', 'score': 0.5621034, 'index': 37, 'word': 'mars', 'start': 152, 'end': 156}, {'entity': 'B-ORG', 'score': 0.83589464, 'index': 59, 'word': 'viking', 'start': 240, 'end': 246}, {'entity': 'I-ORG', 'score': 0.93603927, 'index': 60, 'word': '1', 'start': 247, 'end': 248}, {'entity': 'I-ORG', 'score': 0.67972714, 'index': 61, 'word': 'spacecraft', 'start': 249, 'end': 259}, {'entity': 'B-LOC', 'score': 0.47661397, 'index': 97, 'word': 'marti', 'start': 407, 'end': 412}, {'entity': 'I-ORG', 'score': 0.5451927, 'index': 98, 'word': '##an', 'start': 412, 'end': 414}, {'entity': 'I-ORG', 'score': 0.60608196, 'index': 99, 'word': 'mesa', 'start': 415, 'end': 419}, {'entity': 'B-LOC', 'score': 0.8834184, 'index': 103, 'word': 'c', 'start': 435, 'end': 436}, {'entity': 'I-LOC', 'score': 0.7397884, 'index': 104, 'word': '##yd', 'start': 436, 'end': 438}, {'entity': 'I-LOC', 'score': 0.9083945, 'index': 105, 'word': '##onia', 'start': 438, 'end': 442}, {'entity': 'B-ORG', 'score': 0.5038258, 'index': 118, 'word': 'egypt', 'start': 496, 'end': 501}, {'entity': 'I-ORG', 'score': 0.64455724, 'index': 119, 'word': '##ion', 'start': 501, 'end': 504}, {'entity': 'I-ORG', 'score': 0.6576205, 'index': 120, 'word': 'pha', 'start': 505, 'end': 508}, {'entity': 'I-ORG', 'score': 0.6960956, 'index': 121, 'word': '##rao', 'start': 508, 'end': 511}, {'entity': 'I-ORG', 'score': 0.61072016, 'index': 122, 'word': '##h', 'start': 511, 'end': 512}, {'entity': 'B-ORG', 'score': 0.7676731, 'index': 190, 'word': 'nasa', 'start': 801, 'end': 805}, {'entity': 'B-LOC', 'score': 0.37248164, 'index': 201, 'word': 'mars', 'start': 843, 'end': 847}, {'entity': 'I-ORG', 'score': 0.6403719, 'index': 211, 'word': 'mars', 'start': 875, 'end': 879}, {'entity': 'B-ORG', 'score': 0.6982709, 'index': 233, 'word': 'haunted', 'start': 972, 'end': 979}, {'entity': 'I-ORG', 'score': 0.8855793, 'index': 234, 'word': 'gr', 'start': 980, 'end': 982}, {'entity': 'I-ORG', 'score': 0.89122075, 'index': 235, 'word': '##oce', 'start': 982, 'end': 985}, {'entity': 'I-ORG', 'score': 0.91083044, 'index': 236, 'word': '##ry', 'start': 985, 'end': 987}, {'entity': 'I-ORG', 'score': 0.78272146, 'index': 237, 'word': 'store', 'start': 988, 'end': 993}, {'entity': 'I-ORG', 'score': 0.39987472, 'index': 257, 'word': 'mars', 'start': 1088, 'end': 1092}, {'entity': 'B-ORG', 'score': 0.37609065, 'index': 272, 'word': 'defenders', 'start': 1152, 'end': 1161}, {'entity': 'I-ORG', 'score': 0.49420744, 'index': 273, 'word': 'of', 'start': 1162, 'end': 1164}, {'entity': 'I-ORG', 'score': 0.65821886, 'index': 274, 'word': 'the', 'start': 1165, 'end': 1168}, {'entity': 'I-ORG', 'score': 0.626755, 'index': 275, 'word': 'nasa', 'start': 1169, 'end': 1173}, {'entity': 'I-ORG', 'score': 0.6965647, 'index': 281, 'word': 'civilization', 'start': 1204, 'end': 1216}, {'entity': 'I-ORG', 'score': 0.48566574, 'index': 282, 'word': 'on', 'start': 1217, 'end': 1219}, {'entity': 'I-ORG', 'score': 0.70331687, 'index': 283, 'word': 'mars', 'start': 1220, 'end': 1224}, {'entity': 'B-PER', 'score': 0.9669894, 'index': 308, 'word': 'michael', 'start': 1312, 'end': 1319}, {'entity': 'I-PER', 'score': 0.94802105, 'index': 309, 'word': 'mali', 'start': 1320, 'end': 1324}, {'entity': 'I-PER', 'score': 0.9340164, 'index': 310, 'word': '##n', 'start': 1324, 'end': 1325}, {'entity': 'B-ORG', 'score': 0.8970071, 'index': 313, 'word': 'mars', 'start': 1334, 'end': 1338}, {'entity': 'I-ORG', 'score': 0.93871564, 'index': 314, 'word': 'orbite', 'start': 1339, 'end': 1345}, {'entity': 'I-ORG', 'score': 0.9164465, 'index': 315, 'word': '##r', 'start': 1345, 'end': 1346}, {'entity': 'I-ORG', 'score': 0.7809999, 'index': 316, 'word': 'camera', 'start': 1347, 'end': 1353}, {'entity': 'B-ORG', 'score': 0.4608157, 'index': 330, 'word': 'viking', 'start': 1419, 'end': 1425}, {'entity': 'B-ORG', 'score': 0.6714569, 'index': 420, 'word': 'absolute', 'start': 1803, 'end': 1811}, {'entity': 'I-ORG', 'score': 0.76191556, 'index': 421, 'word': 'maximum', 'start': 1812, 'end': 1819}, {'entity': 'I-ORG', 'score': 0.67246604, 'index': 422, 'word': 'revolution', 'start': 1820, 'end': 1830}, {'entity': 'B-LOC', 'score': 0.6010912, 'index': 468, 'word': 'butte', 'start': 2041, 'end': 2046}, {'entity': 'I-ORG', 'score': 0.46326715, 'index': 469, 'word': 'or', 'start': 2047, 'end': 2049}, {'entity': 'I-ORG', 'score': 0.45148414, 'index': 470, 'word': 'mesa', 'start': 2050, 'end': 2054}, {'entity': 'B-LOC', 'score': 0.8956182, 'index': 480, 'word': 'american', 'start': 2094, 'end': 2102}, {'entity': 'I-LOC', 'score': 0.9263237, 'index': 481, 'word': 'west', 'start': 2103, 'end': 2107}]\n" ] } ], "source": [ "import json\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"GEOcite/AuthorParserModel\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"GEOcite/AuthorParserModel\")\n", "classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "salida=classifier(text)\n", "print(salida)" ] }, { "cell_type": "code", "execution_count": 14, "id": "b09510d4-fdb1-44a8-aa54-34c809af5aee", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "B-LOC 5\n", "B-ORG 9\n", "B-PER 1\n", "I-LOC 3\n", "I-ORG 29\n", "I-PER 2\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word \n", "B-LOC american 1\n", " butte 1\n", " c 1\n", " mars 1\n", " marti 1\n", "B-ORG absolute 1\n", " defenders 1\n", " egypt 1\n", " haunted 1\n", " mars 1\n", " nasa 2\n", " viking 2\n", "B-PER michael 1\n", "I-LOC ##onia 1\n", " ##yd 1\n", " west 1\n", "I-ORG ##an 1\n", " ##h 1\n", " ##ion 1\n", " ##oce 1\n", " ##r 1\n", " ##rao 1\n", " ##ry 1\n", " 1 1\n", " camera 1\n", " civilization 1\n", " gr 1\n", " mars 5\n", " maximum 1\n", " mesa 2\n", " nasa 1\n", " of 1\n", " on 1\n", " or 1\n", " orbite 1\n", " pha 1\n", " revolution 1\n", " spacecraft 1\n", " store 1\n", " the 1\n", "I-PER ##n 1\n", " mali 1\n", "dtype: int64" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "with open(\"18 GEOciteAuthorParserModel.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()" ] }, { "cell_type": "markdown", "id": "116981e0-56cb-4094-99e4-e8b99f62597d", "metadata": {}, "source": [ "## 19 mbruton/spa_en_XLM-R" ] }, { "cell_type": "code", "execution_count": 16, "id": "7220081f-0732-4693-8a92-7b93d17b259a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[{'entity': 'r0:arg1|tem', 'score': 0.9885584, 'index': 4, 'word': '▁you', 'start': 6, 'end': 10}, {'entity': 'r0:root', 'score': 0.9828293, 'index': 5, 'word': \"'\", 'start': 10, 'end': 11}, {'entity': 'r0:root', 'score': 0.9987388, 'index': 6, 'word': 're', 'start': 11, 'end': 13}, {'entity': 'r0:arg2|atr', 'score': 0.9798707, 'index': 9, 'word': '▁scientist', 'start': 20, 'end': 30}, {'entity': 'r1:arg1|tem', 'score': 0.9950965, 'index': 11, 'word': '▁you', 'start': 31, 'end': 35}, {'entity': 'r1:root', 'score': 0.9981749, 'index': 13, 'word': '▁be', 'start': 42, 'end': 45}, {'entity': 'r1:arg2|atr', 'score': 0.98995215, 'index': 14, 'word': '▁able', 'start': 45, 'end': 50}, {'entity': 'r2:root', 'score': 0.99103266, 'index': 16, 'word': '▁tell', 'start': 53, 'end': 58}, {'entity': 'r2:arg2|ben', 'score': 0.9612953, 'index': 17, 'word': '▁me', 'start': 58, 'end': 61}, {'entity': 'r2:arg1|pat', 'score': 0.98190963, 'index': 20, 'word': '▁story', 'start': 71, 'end': 77}, {'entity': 'r3:arg1|tem', 'score': 0.98912394, 'index': 27, 'word': '▁which', 'start': 101, 'end': 107}, {'entity': 'r3:argM|adv', 'score': 0.60085195, 'index': 28, 'word': '▁obviously', 'start': 107, 'end': 117}, {'entity': 'r3:root', 'score': 0.9985306, 'index': 29, 'word': '▁is', 'start': 117, 'end': 120}, {'entity': 'r3:arg2|atr', 'score': 0.9856076, 'index': 30, 'word': '▁evidence', 'start': 120, 'end': 129}, {'entity': 'r4:root', 'score': 0.55564994, 'index': 32, 'word': '▁there', 'start': 134, 'end': 140}, {'entity': 'r4:root', 'score': 0.9971462, 'index': 33, 'word': '▁is', 'start': 140, 'end': 143}, {'entity': 'r4:arg1|tem', 'score': 0.94116557, 'index': 34, 'word': '▁life', 'start': 143, 'end': 148}, {'entity': 'r4:argM|loc', 'score': 0.54012036, 'index': 35, 'word': '▁on', 'start': 148, 'end': 151}, {'entity': 'r6:arg1|pat', 'score': 0.69593036, 'index': 41, 'word': '▁face', 'start': 170, 'end': 175}, {'entity': 'r6:root', 'score': 0.69639283, 'index': 43, 'word': '▁created', 'start': 179, 'end': 187}, {'entity': 'r6:arg0|agt', 'score': 0.64183134, 'index': 44, 'word': '▁by', 'start': 187, 'end': 190}, {'entity': 'r7:arg0|agt', 'score': 0.55535495, 'index': 61, 'word': 'craft', 'start': 254, 'end': 259}, {'entity': 'r8:root', 'score': 0.47476873, 'index': 63, 'word': '▁circ', 'start': 263, 'end': 268}, {'entity': 'r8:root', 'score': 0.47575742, 'index': 64, 'word': 'ling', 'start': 268, 'end': 272}, {'entity': 'r8:arg1|pat', 'score': 0.31078094, 'index': 66, 'word': '▁planet', 'start': 276, 'end': 283}, {'entity': 'r8:root', 'score': 0.31837395, 'index': 68, 'word': '▁sna', 'start': 284, 'end': 288}, {'entity': 'r9:root', 'score': 0.31875014, 'index': 69, 'word': 'pping', 'start': 288, 'end': 293}, {'entity': 'r8:arg1|pat', 'score': 0.286856, 'index': 70, 'word': '▁photos', 'start': 293, 'end': 300}, {'entity': 'r8:arg0|agt', 'score': 0.21294405, 'index': 73, 'word': '▁it', 'start': 306, 'end': 309}, {'entity': 'r9:root', 'score': 0.3105045, 'index': 74, 'word': '▁spot', 'start': 309, 'end': 314}, {'entity': 'r9:root', 'score': 0.31755808, 'index': 75, 'word': 'ted', 'start': 314, 'end': 317}, {'entity': 'r9:arg1|pat', 'score': 0.22570874, 'index': 79, 'word': '▁like', 'start': 329, 'end': 334}, {'entity': 'r9:arg1|pat', 'score': 0.2306108, 'index': 80, 'word': 'ness', 'start': 334, 'end': 338}, {'entity': 'r9:arg0|agt', 'score': 0.19357839, 'index': 87, 'word': '▁scientist', 'start': 358, 'end': 368}, {'entity': 'r9:root', 'score': 0.2762144, 'index': 89, 'word': '▁figure', 'start': 369, 'end': 376}, {'entity': 'r9:root', 'score': 0.22951078, 'index': 90, 'word': 'd', 'start': 376, 'end': 377}, {'entity': 'r8:arg1|tem', 'score': 0.078046024, 'index': 93, 'word': '▁it', 'start': 386, 'end': 389}, {'entity': 'r9:root', 'score': 0.15313852, 'index': 94, 'word': '▁was', 'start': 389, 'end': 393}, {'entity': 'r9:arg1|pat', 'score': 0.045638904, 'index': 99, 'word': '▁mesa', 'start': 414, 'end': 419}, {'entity': 'r9:arg1|tem', 'score': 0.058356848, 'index': 109, 'word': '▁one', 'start': 453, 'end': 457}, {'entity': 'r9:root', 'score': 0.1325599, 'index': 110, 'word': '▁had', 'start': 457, 'end': 461}, {'entity': 'r9:arg1|pat', 'score': 0.06691175, 'index': 111, 'word': '▁shadow', 'start': 461, 'end': 468}, {'entity': 'r9:arg0|agt', 'score': 0.06208066, 'index': 113, 'word': '▁that', 'start': 469, 'end': 474}, {'entity': 'r9:arg1|pat', 'score': 0.0632269, 'index': 115, 'word': '▁it', 'start': 479, 'end': 482}, {'entity': 'r9:root', 'score': 0.099622406, 'index': 116, 'word': '▁look', 'start': 482, 'end': 487}, {'entity': 'r9:arg1|pat', 'score': 0.034427807, 'index': 117, 'word': '▁like', 'start': 487, 'end': 492}, {'entity': 'r8:argM|tmp', 'score': 0.06514092, 'index': 128, 'word': '▁later', 'start': 527, 'end': 533}, {'entity': 'r9:arg0|agt', 'score': 0.09965178, 'index': 130, 'word': '▁we', 'start': 534, 'end': 537}, {'entity': 'r11:root', 'score': 0.08552331, 'index': 131, 'word': '▁reveal', 'start': 537, 'end': 544}, {'entity': 'r11:root', 'score': 0.07387665, 'index': 132, 'word': 'ed', 'start': 544, 'end': 546}, {'entity': 'r9:arg1|pat', 'score': 0.12956654, 'index': 134, 'word': '▁image', 'start': 550, 'end': 556}, {'entity': 'r8:argM|adv', 'score': 0.038444836, 'index': 135, 'word': '▁for', 'start': 556, 'end': 560}, {'entity': 'r9:arg0|agt', 'score': 0.047709465, 'index': 141, 'word': '▁we', 'start': 576, 'end': 579}, {'entity': 'r10:root', 'score': 0.077698395, 'index': 142, 'word': '▁made', 'start': 579, 'end': 584}, {'entity': 'r9:arg1|pat', 'score': 0.04974987, 'index': 147, 'word': '▁it', 'start': 602, 'end': 605}, {'entity': 'r11:root', 'score': 0.073394544, 'index': 148, 'word': '▁was', 'start': 605, 'end': 609}, {'entity': 'r9:arg1|pat', 'score': 0.04125579, 'index': 151, 'word': '▁rock', 'start': 616, 'end': 621}, {'entity': 'r9:arg1|pat', 'score': 0.039437402, 'index': 152, 'word': '▁formation', 'start': 621, 'end': 631}, {'entity': 'r8:arg1|tem', 'score': 0.044781037, 'index': 153, 'word': '▁that', 'start': 631, 'end': 636}, {'entity': 'r8:argM|adv', 'score': 0.039434977, 'index': 154, 'word': '▁just', 'start': 636, 'end': 641}, {'entity': 'r11:root', 'score': 0.0661228, 'index': 155, 'word': '▁rese', 'start': 641, 'end': 646}, {'entity': 'r11:root', 'score': 0.0739401, 'index': 156, 'word': 'mble', 'start': 646, 'end': 650}, {'entity': 'r11:root', 'score': 0.06486134, 'index': 157, 'word': 'd', 'start': 650, 'end': 651}, {'entity': 'r9:arg1|pat', 'score': 0.046511702, 'index': 160, 'word': '▁head', 'start': 659, 'end': 664}, {'entity': 'r11:root', 'score': 0.0790022, 'index': 169, 'word': '▁for', 'start': 692, 'end': 696}, {'entity': 'r10:root', 'score': 0.077606075, 'index': 170, 'word': 'med', 'start': 696, 'end': 699}, {'entity': 'r9:arg0|agt', 'score': 0.040013608, 'index': 171, 'word': '▁by', 'start': 699, 'end': 702}, {'entity': 'r9:arg0|agt', 'score': 0.08678443, 'index': 175, 'word': '▁We', 'start': 711, 'end': 714}, {'entity': 'r9:root', 'score': 0.10425882, 'index': 177, 'word': '▁announced', 'start': 719, 'end': 729}, {'entity': 'r9:arg1|pat', 'score': 0.13532344, 'index': 178, 'word': '▁it', 'start': 729, 'end': 732}, {'entity': 'r9:arg0|agt', 'score': 0.03753009, 'index': 180, 'word': '▁we', 'start': 740, 'end': 743}, {'entity': 'r9:root', 'score': 0.07860999, 'index': 181, 'word': '▁thought', 'start': 743, 'end': 751}, {'entity': 'r9:arg1|pat', 'score': 0.058518294, 'index': 182, 'word': '▁it', 'start': 751, 'end': 754}, {'entity': 'r9:root', 'score': 0.15334934, 'index': 184, 'word': '▁be', 'start': 760, 'end': 763}, {'entity': 'r8:arg2|atr', 'score': 0.077581845, 'index': 187, 'word': '▁way', 'start': 770, 'end': 774}, {'entity': 'r9:root', 'score': 0.1817208, 'index': 189, 'word': '▁engage', 'start': 777, 'end': 784}, {'entity': 'r8:arg1|tem', 'score': 0.06929828, 'index': 191, 'word': '▁public', 'start': 788, 'end': 795}, {'entity': 'r8:arg1|tem', 'score': 0.07084324, 'index': 209, 'word': '▁it', 'start': 853, 'end': 856}, {'entity': 'r9:root', 'score': 0.097537614, 'index': 210, 'word': '▁did', 'start': 856, 'end': 860}, {'entity': 'r8:arg1|tem', 'score': 0.22996224, 'index': 213, 'word': '▁face', 'start': 866, 'end': 871}, {'entity': 'r8:argM|tmp', 'score': 0.15224062, 'index': 216, 'word': '▁soon', 'start': 879, 'end': 884}, {'entity': 'r9:root', 'score': 0.14052552, 'index': 217, 'word': '▁became', 'start': 884, 'end': 891}, {'entity': 'r7:arg2|atr', 'score': 0.13227668, 'index': 220, 'word': '▁icon', 'start': 897, 'end': 902}, {'entity': 'r2:root', 'score': 0.34378532, 'index': 222, 'word': '▁shot', 'start': 903, 'end': 908}, {'entity': 'r4:argM|loc', 'score': 0.28516474, 'index': 223, 'word': '▁in', 'start': 908, 'end': 911}, {'entity': 'r3:root', 'score': 0.45903492, 'index': 226, 'word': '▁appeared', 'start': 919, 'end': 928}, {'entity': 'r4:arg2|loc', 'score': 0.18714885, 'index': 227, 'word': '▁in', 'start': 928, 'end': 931}, {'entity': 'r3:argM|tmp', 'score': 0.3332829, 'index': 248, 'word': '▁for', 'start': 1008, 'end': 1012}, {'entity': 'r4:arg0|agt', 'score': 0.4323846, 'index': 253, 'word': '▁people', 'start': 1027, 'end': 1034}, {'entity': 'r4:root', 'score': 0.6460269, 'index': 254, 'word': '▁thought', 'start': 1034, 'end': 1042}, {'entity': 'r5:arg1|tem', 'score': 0.7946849, 'index': 257, 'word': '▁land', 'start': 1054, 'end': 1059}, {'entity': 'r5:arg1|tem', 'score': 0.7655751, 'index': 258, 'word': 'form', 'start': 1059, 'end': 1063}, {'entity': 'r5:root', 'score': 0.87633103, 'index': 259, 'word': '▁was', 'start': 1063, 'end': 1067}, {'entity': 'r5:arg2|atr', 'score': 0.82787794, 'index': 260, 'word': '▁evidence', 'start': 1067, 'end': 1076}, {'entity': 'r5:arg0|agt', 'score': 0.4799223, 'index': 269, 'word': '▁scientist', 'start': 1105, 'end': 1115}, {'entity': 'r5:root', 'score': 0.8739596, 'index': 271, 'word': '▁wanted', 'start': 1116, 'end': 1123}, {'entity': 'r5:root', 'score': 0.4174833, 'index': 273, 'word': '▁hi', 'start': 1126, 'end': 1129}, {'entity': 'r6:arg1|pat', 'score': 0.72568583, 'index': 275, 'word': '▁it', 'start': 1131, 'end': 1134}, {'entity': 'r5:argM|adv', 'score': 0.48375428, 'index': 278, 'word': '▁really', 'start': 1139, 'end': 1146}, {'entity': 'r5:arg0|agt', 'score': 0.5601293, 'index': 281, 'word': '▁defender', 'start': 1151, 'end': 1160}, {'entity': 'r4:root', 'score': 0.5749663, 'index': 287, 'word': '▁wish', 'start': 1180, 'end': 1185}, {'entity': 'r5:root', 'score': 0.5627814, 'index': 289, 'word': '▁was', 'start': 1191, 'end': 1195}, {'entity': 'r5:arg1|tem', 'score': 0.7166488, 'index': 292, 'word': '▁civiliza', 'start': 1203, 'end': 1212}, {'entity': 'r5:arg1|tem', 'score': 0.45804867, 'index': 293, 'word': 'tion', 'start': 1212, 'end': 1216}, {'entity': 'r5:arg0|agt', 'score': 0.82322997, 'index': 297, 'word': '▁We', 'start': 1225, 'end': 1228}, {'entity': 'r5:root', 'score': 0.84057826, 'index': 298, 'word': '▁decided', 'start': 1228, 'end': 1236}, {'entity': 'r5:root', 'score': 0.49023774, 'index': 300, 'word': '▁take', 'start': 1239, 'end': 1244}, {'entity': 'r6:arg1|pat', 'score': 0.4983117, 'index': 302, 'word': '▁shot', 'start': 1252, 'end': 1257}, {'entity': 'r6:arg1|tem', 'score': 0.7063215, 'index': 307, 'word': '▁we', 'start': 1275, 'end': 1278}, {'entity': 'r6:root', 'score': 0.58319706, 'index': 308, 'word': '▁were', 'start': 1278, 'end': 1283}, {'entity': 'r6:arg2|atr', 'score': 0.6607443, 'index': 312, 'word': '▁wrong', 'start': 1286, 'end': 1292}, {'entity': 'r6:argM|tmp', 'score': 0.4239783, 'index': 314, 'word': '▁on', 'start': 1293, 'end': 1296}, {'entity': 'r7:arg0|agt', 'score': 0.61083174, 'index': 319, 'word': '▁Michael', 'start': 1311, 'end': 1319}, {'entity': 'r7:arg0|agt', 'score': 0.43220523, 'index': 320, 'word': '▁Malin', 'start': 1319, 'end': 1325}, {'entity': 'r7:root', 'score': 0.568414, 'index': 329, 'word': '▁took', 'start': 1358, 'end': 1363}, {'entity': 'r6:arg1|pat', 'score': 0.70329344, 'index': 331, 'word': '▁picture', 'start': 1365, 'end': 1373}, {'entity': 'r7:arg1|tem', 'score': 0.4258404, 'index': 332, 'word': '▁that', 'start': 1373, 'end': 1378}, {'entity': 'r7:root', 'score': 0.72852856, 'index': 333, 'word': '▁was', 'start': 1378, 'end': 1382}, {'entity': 'r7:arg2|atr', 'score': 0.4354887, 'index': 336, 'word': '▁sharp', 'start': 1392, 'end': 1398}, {'entity': 'r7:arg2|atr', 'score': 0.3854383, 'index': 337, 'word': 'er', 'start': 1398, 'end': 1400}, {'entity': 'r8:root', 'score': 0.4831441, 'index': 344, 'word': '▁reveal', 'start': 1433, 'end': 1440}, {'entity': 'r8:root', 'score': 0.3593413, 'index': 345, 'word': 'ing', 'start': 1440, 'end': 1443}, {'entity': 'r9:arg1|pat', 'score': 0.2637543, 'index': 348, 'word': '▁land', 'start': 1453, 'end': 1458}, {'entity': 'r8:arg1|tem', 'score': 0.20474246, 'index': 351, 'word': '▁which', 'start': 1463, 'end': 1469}, {'entity': 'r9:root', 'score': 0.3001293, 'index': 352, 'word': '▁meant', 'start': 1469, 'end': 1475}, {'entity': 'r9:arg1|pat', 'score': 0.060182273, 'index': 355, 'word': '▁monument', 'start': 1484, 'end': 1493}, {'entity': 'r8:arg1|tem', 'score': 0.17025372, 'index': 360, 'word': '▁picture', 'start': 1504, 'end': 1512}, {'entity': 'r9:root', 'score': 0.30658206, 'index': 361, 'word': '▁wasn', 'start': 1512, 'end': 1517}, {'entity': 'r8:arg2|atr', 'score': 0.1777854, 'index': 365, 'word': '▁clear', 'start': 1524, 'end': 1530}, {'entity': 'r9:arg0|agt', 'score': 0.04626326, 'index': 369, 'word': '▁which', 'start': 1538, 'end': 1544}, {'entity': 'r9:root', 'score': 0.13540097, 'index': 371, 'word': '▁mean', 'start': 1550, 'end': 1555}, {'entity': 'r9:arg1|pat', 'score': 0.12729116, 'index': 373, 'word': '▁mark', 'start': 1561, 'end': 1566}, {'entity': 'r9:arg1|pat', 'score': 0.11717077, 'index': 374, 'word': 'ings', 'start': 1566, 'end': 1570}, {'entity': 'r9:root', 'score': 0.15416586, 'index': 376, 'word': '▁hidden', 'start': 1575, 'end': 1582}, {'entity': 'r9:arg0|agt', 'score': 0.0479266, 'index': 377, 'word': '▁by', 'start': 1582, 'end': 1585}, {'entity': 'r8:arg1|tem', 'score': 0.06482706, 'index': 386, 'word': '▁rumor', 'start': 1609, 'end': 1615}, {'entity': 'r9:root', 'score': 0.09473133, 'index': 387, 'word': '▁started', 'start': 1615, 'end': 1623}, {'entity': 'r9:root', 'score': 0.08225819, 'index': 391, 'word': '▁prove', 'start': 1631, 'end': 1637}, {'entity': 'r9:arg1|pat', 'score': 0.11032233, 'index': 392, 'word': '▁them', 'start': 1637, 'end': 1642}, {'entity': 'r9:arg1|pat', 'score': 0.039644323, 'index': 393, 'word': '▁wrong', 'start': 1642, 'end': 1648}, {'entity': 'r8:argM|tmp', 'score': 0.058614645, 'index': 394, 'word': '▁on', 'start': 1648, 'end': 1651}, {'entity': 'r9:arg0|agt', 'score': 0.09158646, 'index': 399, 'word': '▁we', 'start': 1665, 'end': 1668}, {'entity': 'r9:root', 'score': 0.097704664, 'index': 400, 'word': '▁decided', 'start': 1668, 'end': 1676}, {'entity': 'r9:root', 'score': 0.06539654, 'index': 402, 'word': '▁take', 'start': 1679, 'end': 1684}, {'entity': 'r9:arg1|pat', 'score': 0.103520945, 'index': 404, 'word': '▁picture', 'start': 1692, 'end': 1700}, {'entity': 'r9:arg1|pat', 'score': 0.051061057, 'index': 408, 'word': '▁it', 'start': 1713, 'end': 1716}, {'entity': 'r10:root', 'score': 0.08168564, 'index': 409, 'word': '▁was', 'start': 1716, 'end': 1720}, {'entity': 'r9:arg1|pat', 'score': 0.03991396, 'index': 414, 'word': '▁day', 'start': 1739, 'end': 1743}, {'entity': 'r9:arg0|agt', 'score': 0.07429483, 'index': 416, 'word': '▁Malin', 'start': 1744, 'end': 1750}, {'entity': 'r9:arg0|agt', 'score': 0.0536705, 'index': 419, 'word': '▁team', 'start': 1752, 'end': 1757}, {'entity': 'r10:root', 'score': 0.08358262, 'index': 420, 'word': '▁capture', 'start': 1757, 'end': 1765}, {'entity': 'r10:root', 'score': 0.079094924, 'index': 421, 'word': 'd', 'start': 1765, 'end': 1766}, {'entity': 'r9:arg1|pat', 'score': 0.11371465, 'index': 424, 'word': '▁photo', 'start': 1777, 'end': 1783}, {'entity': 'r11:root', 'score': 0.0549004, 'index': 425, 'word': '▁using', 'start': 1783, 'end': 1789}, {'entity': 'r9:arg1|pat', 'score': 0.062476702, 'index': 427, 'word': '▁camera', 'start': 1793, 'end': 1800}, {'entity': 'r9:arg1|pat', 'score': 0.023913436, 'index': 434, 'word': '▁With', 'start': 1831, 'end': 1836}, {'entity': 'r9:arg0|agt', 'score': 0.04540858, 'index': 437, 'word': '▁you', 'start': 1848, 'end': 1852}, {'entity': 'r9:root', 'score': 0.1428813, 'index': 439, 'word': '▁discern', 'start': 1856, 'end': 1864}, {'entity': 'r9:arg1|pat', 'score': 0.12133165, 'index': 440, 'word': '▁things', 'start': 1864, 'end': 1871}, {'entity': 'r8:argM|loc', 'score': 0.03498705, 'index': 441, 'word': '▁in', 'start': 1871, 'end': 1874}, {'entity': 'r9:arg1|pat', 'score': 0.03346169, 'index': 453, 'word': '▁which', 'start': 1926, 'end': 1932}, {'entity': 'r9:root', 'score': 0.12779085, 'index': 454, 'word': '▁means', 'start': 1932, 'end': 1938}, {'entity': 'r9:root', 'score': 0.057673983, 'index': 456, 'word': '▁there', 'start': 1941, 'end': 1947}, {'entity': 'r9:root', 'score': 0.14751895, 'index': 457, 'word': '▁were', 'start': 1947, 'end': 1952}, {'entity': 'r9:arg1|pat', 'score': 0.07470571, 'index': 459, 'word': '▁sign', 'start': 1956, 'end': 1961}, {'entity': 'r9:arg0|agt', 'score': 0.054813575, 'index': 464, 'word': '▁you', 'start': 1971, 'end': 1975}, {'entity': 'r8:argM|adv', 'score': 0.036606755, 'index': 466, 'word': '▁easily', 'start': 1981, 'end': 1988}, {'entity': 'r9:root', 'score': 0.17583221, 'index': 467, 'word': '▁see', 'start': 1988, 'end': 1992}, {'entity': 'r9:arg2|atr', 'score': 0.052657988, 'index': 468, 'word': '▁what', 'start': 1992, 'end': 1997}, {'entity': 'r8:arg1|tem', 'score': 0.08003322, 'index': 469, 'word': '▁they', 'start': 1997, 'end': 2002}, {'entity': 'r9:root', 'score': 0.13172248, 'index': 470, 'word': '▁were', 'start': 2002, 'end': 2007}, {'entity': 'r9:arg1|pat', 'score': 0.10661528, 'index': 472, 'word': '▁What', 'start': 2008, 'end': 2013}, {'entity': 'r9:arg0|agt', 'score': 0.059802476, 'index': 474, 'word': '▁picture', 'start': 2017, 'end': 2025}, {'entity': 'r9:root', 'score': 0.14850138, 'index': 475, 'word': '▁showed', 'start': 2025, 'end': 2032}, {'entity': 'r9:root', 'score': 0.23177616, 'index': 476, 'word': '▁was', 'start': 2032, 'end': 2036}, {'entity': 'r8:arg2|atr', 'score': 0.21636702, 'index': 478, 'word': '▁but', 'start': 2040, 'end': 2044}, {'entity': 'r7:arg2|atr', 'score': 0.2325799, 'index': 479, 'word': 'te', 'start': 2044, 'end': 2046}, {'entity': 'r7:arg1|tem', 'score': 0.26516578, 'index': 483, 'word': '▁which', 'start': 2055, 'end': 2061}, {'entity': 'r7:root', 'score': 0.20337786, 'index': 484, 'word': '▁are', 'start': 2061, 'end': 2065}, {'entity': 'r7:arg2|atr', 'score': 0.2973422, 'index': 485, 'word': '▁land', 'start': 2065, 'end': 2070}, {'entity': 'r7:arg2|atr', 'score': 0.31532818, 'index': 486, 'word': 'form', 'start': 2070, 'end': 2074}, {'entity': 'r7:arg2|atr', 'score': 0.2744848, 'index': 487, 'word': 's', 'start': 2074, 'end': 2075}]\n" ] } ], "source": [ "import json\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"mbruton/spa_en_XLM-R\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"mbruton/spa_en_XLM-R\")\n", "classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "salida=classifier(text)\n", "print(salida)" ] }, { "cell_type": "code", "execution_count": 23, "id": "4727c78d-e205-48e0-b30a-3b475a3060c7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "entity word \n", "r0:arg1|tem ▁you 1\n", "r0:arg2|atr ▁scientist 1\n", "r0:root 1\n", " re 1\n", "r10:root d 1\n", " ..\n", "r9:root ▁there 1\n", " ▁thought 1\n", " ▁was 2\n", " ▁wasn 1\n", " ▁were 2\n", "Length: 168, dtype: int64" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "with open(\"19 mbrutonspa_en_XLM-R.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "#print(aux.groupby(['entity']).size())\n", "aux2 = aux.groupby(['entity', 'word']).size()\n", "aux2\n" ] }, { "cell_type": "code", "execution_count": 27, "id": "172bc82e-129d-40c1-9f33-0a80eaa9237d", "metadata": {}, "outputs": [], "source": [ "pd.set_option('display.max_rows', None) " ] }, { "cell_type": "code", "execution_count": 28, "id": "c4c6dff8-ce18-47e3-8427-e74ca7f4307f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "entity word \n", "r0:arg1|tem ▁you 1\n", "r0:arg2|atr ▁scientist 1\n", "r0:root 1\n", " re 1\n", "r10:root d 1\n", " med 1\n", " ▁capture 1\n", " ▁made 1\n", " ▁was 1\n", "r11:root d 1\n", " ed 1\n", " mble 1\n", " ▁for 1\n", " ▁rese 1\n", " ▁reveal 1\n", " ▁using 1\n", " ▁was 1\n", "r1:arg1|tem ▁you 1\n", "r1:arg2|atr ▁able 1\n", "r1:root ▁be 1\n", "r2:arg1|pat ▁story 1\n", "r2:arg2|ben ▁me 1\n", "r2:root ▁shot 1\n", " ▁tell 1\n", "r3:arg1|tem ▁which 1\n", "r3:arg2|atr ▁evidence 1\n", "r3:argM|adv ▁obviously 1\n", "r3:argM|tmp ▁for 1\n", "r3:root ▁appeared 1\n", " ▁is 1\n", "r4:arg0|agt ▁people 1\n", "r4:arg1|tem ▁life 1\n", "r4:arg2|loc ▁in 1\n", "r4:argM|loc ▁in 1\n", " ▁on 1\n", "r4:root ▁is 1\n", " ▁there 1\n", " ▁thought 1\n", " ▁wish 1\n", "r5:arg0|agt ▁We 1\n", " ▁defender 1\n", " ▁scientist 1\n", "r5:arg1|tem form 1\n", " tion 1\n", " ▁civiliza 1\n", " ▁land 1\n", "r5:arg2|atr ▁evidence 1\n", "r5:argM|adv ▁really 1\n", "r5:root ▁decided 1\n", " ▁hi 1\n", " ▁take 1\n", " ▁wanted 1\n", " ▁was 2\n", "r6:arg0|agt ▁by 1\n", "r6:arg1|pat ▁face 1\n", " ▁it 1\n", " ▁picture 1\n", " ▁shot 1\n", "r6:arg1|tem ▁we 1\n", "r6:arg2|atr ▁wrong 1\n", "r6:argM|tmp ▁on 1\n", "r6:root ▁created 1\n", " ▁were 1\n", "r7:arg0|agt craft 1\n", " ▁Malin 1\n", " ▁Michael 1\n", "r7:arg1|tem ▁that 1\n", " ▁which 1\n", "r7:arg2|atr er 1\n", " form 1\n", " s 1\n", " te 1\n", " ▁icon 1\n", " ▁land 1\n", " ▁sharp 1\n", "r7:root ▁are 1\n", " ▁took 1\n", " ▁was 1\n", "r8:arg0|agt ▁it 1\n", "r8:arg1|pat ▁photos 1\n", " ▁planet 1\n", "r8:arg1|tem ▁face 1\n", " ▁it 2\n", " ▁picture 1\n", " ▁public 1\n", " ▁rumor 1\n", " ▁that 1\n", " ▁they 1\n", " ▁which 1\n", "r8:arg2|atr ▁but 1\n", " ▁clear 1\n", " ▁way 1\n", "r8:argM|adv ▁easily 1\n", " ▁for 1\n", " ▁just 1\n", "r8:argM|loc ▁in 1\n", "r8:argM|tmp ▁later 1\n", " ▁on 1\n", " ▁soon 1\n", "r8:root ing 1\n", " ling 1\n", " ▁circ 1\n", " ▁reveal 1\n", " ▁sna 1\n", "r9:arg0|agt ▁Malin 1\n", " ▁We 1\n", " ▁by 2\n", " ▁picture 1\n", " ▁scientist 1\n", " ▁team 1\n", " ▁that 1\n", " ▁we 4\n", " ▁which 1\n", " ▁you 2\n", "r9:arg1|pat ings 1\n", " ness 1\n", " ▁What 1\n", " ▁With 1\n", " ▁camera 1\n", " ▁day 1\n", " ▁formation 1\n", " ▁head 1\n", " ▁image 1\n", " ▁it 5\n", " ▁land 1\n", " ▁like 2\n", " ▁mark 1\n", " ▁mesa 1\n", " ▁monument 1\n", " ▁photo 1\n", " ▁picture 1\n", " ▁rock 1\n", " ▁shadow 1\n", " ▁sign 1\n", " ▁them 1\n", " ▁things 1\n", " ▁which 1\n", " ▁wrong 1\n", "r9:arg1|tem ▁one 1\n", "r9:arg2|atr ▁what 1\n", "r9:root d 1\n", " pping 1\n", " ted 1\n", " ▁announced 1\n", " ▁be 1\n", " ▁became 1\n", " ▁decided 1\n", " ▁did 1\n", " ▁discern 1\n", " ▁engage 1\n", " ▁figure 1\n", " ▁had 1\n", " ▁hidden 1\n", " ▁look 1\n", " ▁mean 1\n", " ▁means 1\n", " ▁meant 1\n", " ▁prove 1\n", " ▁see 1\n", " ▁showed 1\n", " ▁spot 1\n", " ▁started 1\n", " ▁take 1\n", " ▁there 1\n", " ▁thought 1\n", " ▁was 2\n", " ▁wasn 1\n", " ▁were 2\n", "dtype: int64" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "aux2" ] }, { "cell_type": "markdown", "id": "747b229b-c017-4db6-bdc9-94ef7210508e", "metadata": {}, "source": [ "## 20 mbruton/gal_enptsp_mBERT" ] }, { "cell_type": "code", "execution_count": 30, "id": "61034c1e-ab67-4221-9aac-bdd54263f6da", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[{'entity': 'r0:root', 'score': 0.69288, 'index': 6, 'word': 're', 'start': 11, 'end': 13}, {'entity': 'r1:root', 'score': 0.8305716, 'index': 16, 'word': 'tell', 'start': 54, 'end': 58}, {'entity': 'r2:arg1', 'score': 0.65180844, 'index': 20, 'word': 'story', 'start': 72, 'end': 77}, {'entity': 'r5:root', 'score': 0.4588822, 'index': 44, 'word': 'created', 'start': 180, 'end': 187}, {'entity': 'r6:root', 'score': 0.49542937, 'index': 64, 'word': 'ci', 'start': 264, 'end': 266}, {'entity': 'r6:root', 'score': 0.45653778, 'index': 65, 'word': '##rc', 'start': 266, 'end': 268}, {'entity': 'r6:root', 'score': 0.31713662, 'index': 66, 'word': '##ling', 'start': 268, 'end': 272}, {'entity': 'r6:arg1', 'score': 0.37775016, 'index': 68, 'word': 'planet', 'start': 277, 'end': 283}, {'entity': 'r6:root', 'score': 0.41948485, 'index': 70, 'word': 'sna', 'start': 285, 'end': 288}, {'entity': 'r6:root', 'score': 0.28788853, 'index': 71, 'word': '##pping', 'start': 288, 'end': 293}, {'entity': 'r6:arg1', 'score': 0.3409012, 'index': 72, 'word': 'photos', 'start': 294, 'end': 300}, {'entity': 'r8:root', 'score': 0.29355028, 'index': 76, 'word': 'spotted', 'start': 310, 'end': 317}, {'entity': 'r8:root', 'score': 0.27520663, 'index': 90, 'word': 'figure', 'start': 370, 'end': 376}, {'entity': 'r8:root', 'score': 0.1575869, 'index': 170, 'word': 'formed', 'start': 693, 'end': 699}, {'entity': 'r8:root', 'score': 0.17861637, 'index': 178, 'word': 'announced', 'start': 720, 'end': 729}, {'entity': 'r8:root', 'score': 0.17304042, 'index': 227, 'word': 'appeared', 'start': 920, 'end': 928}, {'entity': 'r5:root', 'score': 0.31188446, 'index': 253, 'word': 'thought', 'start': 1035, 'end': 1042}, {'entity': 'r5:root', 'score': 0.5253225, 'index': 271, 'word': 'hide', 'start': 1127, 'end': 1131}, {'entity': 'r6:arg1', 'score': 0.34368348, 'index': 272, 'word': 'it', 'start': 1132, 'end': 1134}, {'entity': 'r2:root', 'score': 0.6164163, 'index': 284, 'word': 'wish', 'start': 1181, 'end': 1185}, {'entity': 'r4:root', 'score': 0.31295392, 'index': 296, 'word': 'take', 'start': 1240, 'end': 1244}, {'entity': 'r2:root', 'score': 0.26798648, 'index': 301, 'word': 'make', 'start': 1266, 'end': 1270}, {'entity': 'r8:root', 'score': 0.10455666, 'index': 326, 'word': 'took', 'start': 1359, 'end': 1363}, {'entity': 'r7:arg1', 'score': 0.13289611, 'index': 328, 'word': 'picture', 'start': 1366, 'end': 1373}, {'entity': 'r8:root', 'score': 0.08957504, 'index': 341, 'word': 'reveal', 'start': 1434, 'end': 1440}]\n" ] } ], "source": [ "import json\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"mbruton/gal_enptsp_mBERT\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"mbruton/gal_enptsp_mBERT\")\n", "classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "salida=classifier(text)\n", "print(salida)" ] }, { "cell_type": "code", "execution_count": 31, "id": "2430b81e-ba4d-43b3-b644-43f90cb01c1f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "r0:root 1\n", "r1:root 1\n", "r2:arg1 1\n", "r2:root 2\n", "r4:root 1\n", "r5:root 3\n", "r6:arg1 3\n", "r6:root 5\n", "r7:arg1 1\n", "r8:root 7\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word \n", "r0:root re 1\n", "r1:root tell 1\n", "r2:arg1 story 1\n", "r2:root make 1\n", " wish 1\n", "r4:root take 1\n", "r5:root created 1\n", " hide 1\n", " thought 1\n", "r6:arg1 it 1\n", " photos 1\n", " planet 1\n", "r6:root ##ling 1\n", " ##pping 1\n", " ##rc 1\n", " ci 1\n", " sna 1\n", "r7:arg1 picture 1\n", "r8:root announced 1\n", " appeared 1\n", " figure 1\n", " formed 1\n", " reveal 1\n", " spotted 1\n", " took 1\n", "dtype: int64" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with open(\"20 mbrutongal_enptsp_mBERT.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()" ] }, { "cell_type": "markdown", "id": "fddf510a-a810-4cd6-9e03-675e31a660eb", "metadata": {}, "source": [ "## 21 benjamin/wtp-bert-tiny" ] }, { "cell_type": "code", "execution_count": 33, "id": "4e3dff5d-8b2c-4f45-a475-04f7b11e2c41", "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "The checkpoint you are trying to load has model type `bert-char` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:951\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:653\u001b[0m, in \u001b[0;36m_LazyConfigMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping:\n\u001b[1;32m--> 653\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[0;32m 654\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping[key]\n", "\u001b[1;31mKeyError\u001b[0m: 'bert-char'", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[33], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mjson\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-bert-tiny\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-bert-tiny\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m classifier \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:953\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m--> 953\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 954\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe checkpoint you are trying to load has model type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 955\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut Transformers does not recognize this architecture. This could be because of an \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 956\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124missue with the checkpoint, or because your version of Transformers is out of date.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 957\u001b[0m )\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_dict(config_dict, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39munused_kwargs)\n\u001b[0;32m 959\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 960\u001b[0m \u001b[38;5;66;03m# Fallback: use pattern matching on the string.\u001b[39;00m\n\u001b[0;32m 961\u001b[0m \u001b[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001b[39;00m\n", "\u001b[1;31mValueError\u001b[0m: The checkpoint you are trying to load has model type `bert-char` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date." ] } ], "source": [ "import json\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"benjamin/wtp-bert-tiny\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"benjamin/wtp-bert-tiny\")\n", "classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "salida=classifier(text)\n", "print(salida)" ] }, { "cell_type": "markdown", "id": "551d370f-6909-47ab-8f71-6ae001b06067", "metadata": {}, "source": [ "## 22 benjamin/wtp-canine-s-1l" ] }, { "cell_type": "code", "execution_count": 35, "id": "3dd00e55-9d25-4119-b4f0-ff1d64666c52", "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:951\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:653\u001b[0m, in \u001b[0;36m_LazyConfigMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping:\n\u001b[1;32m--> 653\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[0;32m 654\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping[key]\n", "\u001b[1;31mKeyError\u001b[0m: 'la-canine'", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[35], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mjson\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-1l\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-1l\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m classifier \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:953\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m--> 953\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 954\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe checkpoint you are trying to load has model type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 955\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut Transformers does not recognize this architecture. This could be because of an \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 956\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124missue with the checkpoint, or because your version of Transformers is out of date.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 957\u001b[0m )\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_dict(config_dict, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39munused_kwargs)\n\u001b[0;32m 959\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 960\u001b[0m \u001b[38;5;66;03m# Fallback: use pattern matching on the string.\u001b[39;00m\n\u001b[0;32m 961\u001b[0m \u001b[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001b[39;00m\n", "\u001b[1;31mValueError\u001b[0m: The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date." ] } ], "source": [ "import json\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"benjamin/wtp-canine-s-1l\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"benjamin/wtp-canine-s-1l\")\n", "classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "salida=classifier(text)\n", "print(salida)" ] }, { "cell_type": "markdown", "id": "3f68ad13-321b-443e-8a71-703ee218e1d2", "metadata": {}, "source": [ "## 23 benjamin/wtp-canine-s-6l" ] }, { "cell_type": "code", "execution_count": 37, "id": "ee272836-552e-4335-86b5-6ef64c83294b", "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:951\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:653\u001b[0m, in \u001b[0;36m_LazyConfigMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping:\n\u001b[1;32m--> 653\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[0;32m 654\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping[key]\n", "\u001b[1;31mKeyError\u001b[0m: 'la-canine'", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[37], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mjson\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-6l\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-6l\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m classifier \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:953\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m--> 953\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 954\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe checkpoint you are trying to load has model type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 955\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut Transformers does not recognize this architecture. This could be because of an \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 956\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124missue with the checkpoint, or because your version of Transformers is out of date.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 957\u001b[0m )\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_dict(config_dict, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39munused_kwargs)\n\u001b[0;32m 959\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 960\u001b[0m \u001b[38;5;66;03m# Fallback: use pattern matching on the string.\u001b[39;00m\n\u001b[0;32m 961\u001b[0m \u001b[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001b[39;00m\n", "\u001b[1;31mValueError\u001b[0m: The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date." ] } ], "source": [ "import json\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"benjamin/wtp-canine-s-6l\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"benjamin/wtp-canine-s-6l\")\n", "classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "salida=classifier(text)\n", "print(salida)" ] }, { "cell_type": "markdown", "id": "d7d5fa36-7761-47b3-a4d1-110ad004db06", "metadata": {}, "source": [ "## 24 benjamin/wtp-canine-s-9l" ] }, { "cell_type": "code", "execution_count": 39, "id": "1759bc6d-9a6a-4466-8226-3b8508170373", "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:951\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:653\u001b[0m, in \u001b[0;36m_LazyConfigMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping:\n\u001b[1;32m--> 653\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[0;32m 654\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping[key]\n", "\u001b[1;31mKeyError\u001b[0m: 'la-canine'", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[39], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mjson\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-9l\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-9l\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m classifier \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:953\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m--> 953\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 954\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe checkpoint you are trying to load has model type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 955\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut Transformers does not recognize this architecture. This could be because of an \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 956\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124missue with the checkpoint, or because your version of Transformers is out of date.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 957\u001b[0m )\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_dict(config_dict, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39munused_kwargs)\n\u001b[0;32m 959\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 960\u001b[0m \u001b[38;5;66;03m# Fallback: use pattern matching on the string.\u001b[39;00m\n\u001b[0;32m 961\u001b[0m \u001b[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001b[39;00m\n", "\u001b[1;31mValueError\u001b[0m: The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date." ] } ], "source": [ "import json\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"benjamin/wtp-canine-s-9l\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"benjamin/wtp-canine-s-9l\")\n", "classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "salida=classifier(text)\n", "print(salida)" ] }, { "cell_type": "markdown", "id": "c2509820-70e1-49ce-959c-74430c8ace62", "metadata": {}, "source": [ "## 25 benjamin/wtp-canine-s-1l-no-adapters" ] }, { "cell_type": "code", "execution_count": 41, "id": "36801845-c8c2-49b4-9157-3f915be02912", "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:951\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:653\u001b[0m, in \u001b[0;36m_LazyConfigMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping:\n\u001b[1;32m--> 653\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[0;32m 654\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping[key]\n", "\u001b[1;31mKeyError\u001b[0m: 'la-canine'", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[41], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mjson\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-1l-no-adapters\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-1l-no-adapters\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m classifier \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:953\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m--> 953\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 954\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe checkpoint you are trying to load has model type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 955\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut Transformers does not recognize this architecture. This could be because of an \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 956\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124missue with the checkpoint, or because your version of Transformers is out of date.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 957\u001b[0m )\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_dict(config_dict, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39munused_kwargs)\n\u001b[0;32m 959\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 960\u001b[0m \u001b[38;5;66;03m# Fallback: use pattern matching on the string.\u001b[39;00m\n\u001b[0;32m 961\u001b[0m \u001b[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001b[39;00m\n", "\u001b[1;31mValueError\u001b[0m: The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date." ] } ], "source": [ "import json\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"benjamin/wtp-canine-s-1l-no-adapters\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"benjamin/wtp-canine-s-1l-no-adapters\")\n", "classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "salida=classifier(text)\n", "print(salida)" ] }, { "cell_type": "markdown", "id": "8dc8cf71-eb95-490f-85f8-81a8a77a7471", "metadata": {}, "source": [ "## 26 benjamin/wtp-canine-s-6l-no-adapters" ] }, { "cell_type": "code", "execution_count": 44, "id": "a077b70f-2cb9-4735-a476-b06831039d23", "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:951\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:653\u001b[0m, in \u001b[0;36m_LazyConfigMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping:\n\u001b[1;32m--> 653\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[0;32m 654\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping[key]\n", "\u001b[1;31mKeyError\u001b[0m: 'la-canine'", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[44], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mjson\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-6l-no-adapters\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-6l-no-adapters\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m classifier \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:953\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m--> 953\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 954\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe checkpoint you are trying to load has model type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 955\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut Transformers does not recognize this architecture. This could be because of an \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 956\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124missue with the checkpoint, or because your version of Transformers is out of date.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 957\u001b[0m )\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_dict(config_dict, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39munused_kwargs)\n\u001b[0;32m 959\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 960\u001b[0m \u001b[38;5;66;03m# Fallback: use pattern matching on the string.\u001b[39;00m\n\u001b[0;32m 961\u001b[0m \u001b[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001b[39;00m\n", "\u001b[1;31mValueError\u001b[0m: The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date." ] } ], "source": [ "import json\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"benjamin/wtp-canine-s-6l-no-adapters\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"benjamin/wtp-canine-s-6l-no-adapters\")\n", "classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "salida=classifier(text)\n", "print(salida)" ] }, { "cell_type": "markdown", "id": "8f962e5f-b928-4ad4-9c77-2f042e8c53d6", "metadata": {}, "source": [ "## 27 benjamin/wtp-canine-s-12l-no-adapters" ] }, { "cell_type": "code", "execution_count": 46, "id": "bce862a8-9ec5-4a82-bf78-d75851e9bf0e", "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:951\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:653\u001b[0m, in \u001b[0;36m_LazyConfigMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping:\n\u001b[1;32m--> 653\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[0;32m 654\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping[key]\n", "\u001b[1;31mKeyError\u001b[0m: 'la-canine'", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[46], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mjson\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-12l-no-adapters\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-12l-no-adapters\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m classifier \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:953\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m--> 953\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 954\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe checkpoint you are trying to load has model type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 955\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut Transformers does not recognize this architecture. This could be because of an \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 956\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124missue with the checkpoint, or because your version of Transformers is out of date.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 957\u001b[0m )\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_dict(config_dict, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39munused_kwargs)\n\u001b[0;32m 959\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 960\u001b[0m \u001b[38;5;66;03m# Fallback: use pattern matching on the string.\u001b[39;00m\n\u001b[0;32m 961\u001b[0m \u001b[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001b[39;00m\n", "\u001b[1;31mValueError\u001b[0m: The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date." ] } ], "source": [ "import json\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"benjamin/wtp-canine-s-12l-no-adapters\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"benjamin/wtp-canine-s-12l-no-adapters\")\n", "classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "salida=classifier(text)\n", "print(salida)" ] }, { "cell_type": "markdown", "id": "23afb232-2a61-45c6-ae7a-d9bef25fb3d3", "metadata": {}, "source": [ "## 28 Posos/ClinicalNER" ] }, { "cell_type": "code", "execution_count": 48, "id": "c11476c7-d694-49a6-bd2b-7e300e81f23d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[]\n" ] } ], "source": [ "import json\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"Posos/ClinicalNER\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"Posos/ClinicalNER\")\n", "classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "salida=classifier(text)\n", "print(salida)" ] }, { "cell_type": "markdown", "id": "4da2e211-d26e-4b3b-b395-6817fef1adba", "metadata": {}, "source": [ "## 29 numind/NuNER-multilingual-v0.1" ] }, { "cell_type": "code", "execution_count": 50, "id": "777410bc-d134-4439-9f3c-8840fa5a532c", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of BertForTokenClassification were not initialized from the model checkpoint at numind/NuNER-multilingual-v0.1 and are newly initialized: ['classifier.bias', 'classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[{'entity': 'LABEL_1', 'score': 0.6517681, 'index': 1, 'word': 'So', 'start': 0, 'end': 2}, {'entity': 'LABEL_1', 'score': 0.6665356, 'index': 2, 'word': ',', 'start': 2, 'end': 3}, {'entity': 'LABEL_1', 'score': 0.6989242, 'index': 3, 'word': 'if', 'start': 4, 'end': 6}, {'entity': 'LABEL_1', 'score': 0.7015521, 'index': 4, 'word': 'you', 'start': 7, 'end': 10}, {'entity': 'LABEL_1', 'score': 0.69064546, 'index': 5, 'word': \"'\", 'start': 10, 'end': 11}, {'entity': 'LABEL_1', 'score': 0.6504881, 'index': 6, 'word': 're', 'start': 11, 'end': 13}, {'entity': 'LABEL_1', 'score': 0.57260615, 'index': 7, 'word': 'a', 'start': 14, 'end': 15}, {'entity': 'LABEL_1', 'score': 0.5917183, 'index': 8, 'word': 'NASA', 'start': 16, 'end': 20}, {'entity': 'LABEL_1', 'score': 0.5965599, 'index': 9, 'word': 'scientist', 'start': 21, 'end': 30}, {'entity': 'LABEL_1', 'score': 0.6810555, 'index': 10, 'word': ',', 'start': 30, 'end': 31}, {'entity': 'LABEL_1', 'score': 0.71639186, 'index': 11, 'word': 'you', 'start': 32, 'end': 35}, {'entity': 'LABEL_1', 'score': 0.70142627, 'index': 12, 'word': 'should', 'start': 36, 'end': 42}, {'entity': 'LABEL_1', 'score': 0.7038532, 'index': 13, 'word': 'be', 'start': 43, 'end': 45}, {'entity': 'LABEL_1', 'score': 0.7008251, 'index': 14, 'word': 'able', 'start': 46, 'end': 50}, {'entity': 'LABEL_1', 'score': 0.66452104, 'index': 15, 'word': 'to', 'start': 51, 'end': 53}, {'entity': 'LABEL_1', 'score': 0.63648593, 'index': 16, 'word': 'tell', 'start': 54, 'end': 58}, {'entity': 'LABEL_1', 'score': 0.6549855, 'index': 17, 'word': 'me', 'start': 59, 'end': 61}, {'entity': 'LABEL_0', 'score': 0.51807356, 'index': 18, 'word': 'the', 'start': 62, 'end': 65}, {'entity': 'LABEL_1', 'score': 0.50653565, 'index': 19, 'word': 'whole', 'start': 66, 'end': 71}, {'entity': 'LABEL_0', 'score': 0.52187973, 'index': 20, 'word': 'story', 'start': 72, 'end': 77}, {'entity': 'LABEL_0', 'score': 0.5346267, 'index': 21, 'word': 'about', 'start': 78, 'end': 83}, {'entity': 'LABEL_1', 'score': 0.55174303, 'index': 22, 'word': 'the', 'start': 84, 'end': 87}, {'entity': 'LABEL_1', 'score': 0.60781723, 'index': 23, 'word': 'Face', 'start': 88, 'end': 92}, {'entity': 'LABEL_1', 'score': 0.5665726, 'index': 24, 'word': 'On', 'start': 93, 'end': 95}, {'entity': 'LABEL_1', 'score': 0.5692423, 'index': 25, 'word': 'Mars', 'start': 96, 'end': 100}, {'entity': 'LABEL_1', 'score': 0.6011262, 'index': 26, 'word': ',', 'start': 100, 'end': 101}, {'entity': 'LABEL_1', 'score': 0.5368547, 'index': 27, 'word': 'which', 'start': 102, 'end': 107}, {'entity': 'LABEL_1', 'score': 0.5802307, 'index': 28, 'word': 'obvious', 'start': 108, 'end': 115}, {'entity': 'LABEL_1', 'score': 0.517514, 'index': 29, 'word': '##ly', 'start': 115, 'end': 117}, {'entity': 'LABEL_1', 'score': 0.55036914, 'index': 30, 'word': 'is', 'start': 118, 'end': 120}, {'entity': 'LABEL_1', 'score': 0.5529837, 'index': 31, 'word': 'evidence', 'start': 121, 'end': 129}, {'entity': 'LABEL_0', 'score': 0.5724075, 'index': 32, 'word': 'that', 'start': 130, 'end': 134}, {'entity': 'LABEL_0', 'score': 0.64600146, 'index': 33, 'word': 'there', 'start': 135, 'end': 140}, {'entity': 'LABEL_0', 'score': 0.6608648, 'index': 34, 'word': 'is', 'start': 141, 'end': 143}, {'entity': 'LABEL_0', 'score': 0.6958021, 'index': 35, 'word': 'life', 'start': 144, 'end': 148}, {'entity': 'LABEL_0', 'score': 0.62779915, 'index': 36, 'word': 'on', 'start': 149, 'end': 151}, {'entity': 'LABEL_0', 'score': 0.5481105, 'index': 37, 'word': 'Mars', 'start': 152, 'end': 156}, {'entity': 'LABEL_1', 'score': 0.56165004, 'index': 38, 'word': ',', 'start': 156, 'end': 157}, {'entity': 'LABEL_1', 'score': 0.551388, 'index': 39, 'word': 'and', 'start': 158, 'end': 161}, {'entity': 'LABEL_1', 'score': 0.5514617, 'index': 40, 'word': 'that', 'start': 162, 'end': 166}, {'entity': 'LABEL_1', 'score': 0.53406215, 'index': 41, 'word': 'the', 'start': 167, 'end': 170}, {'entity': 'LABEL_1', 'score': 0.6221385, 'index': 42, 'word': 'face', 'start': 171, 'end': 175}, {'entity': 'LABEL_1', 'score': 0.6061021, 'index': 43, 'word': 'was', 'start': 176, 'end': 179}, {'entity': 'LABEL_1', 'score': 0.55775326, 'index': 44, 'word': 'created', 'start': 180, 'end': 187}, {'entity': 'LABEL_0', 'score': 0.5654199, 'index': 45, 'word': 'by', 'start': 188, 'end': 190}, {'entity': 'LABEL_0', 'score': 0.50058967, 'index': 46, 'word': 'alien', 'start': 191, 'end': 196}, {'entity': 'LABEL_0', 'score': 0.5248566, 'index': 47, 'word': '##s', 'start': 196, 'end': 197}, {'entity': 'LABEL_1', 'score': 0.5247892, 'index': 48, 'word': ',', 'start': 197, 'end': 198}, {'entity': 'LABEL_1', 'score': 0.5430314, 'index': 49, 'word': 'correct', 'start': 199, 'end': 206}, {'entity': 'LABEL_1', 'score': 0.61337465, 'index': 50, 'word': '?', 'start': 206, 'end': 207}, {'entity': 'LABEL_1', 'score': 0.64854234, 'index': 51, 'word': '\"', 'start': 207, 'end': 208}, {'entity': 'LABEL_1', 'score': 0.6051055, 'index': 52, 'word': 'No', 'start': 209, 'end': 211}, {'entity': 'LABEL_1', 'score': 0.58601683, 'index': 53, 'word': ',', 'start': 211, 'end': 212}, {'entity': 'LABEL_0', 'score': 0.6085843, 'index': 54, 'word': 'twenty', 'start': 213, 'end': 219}, {'entity': 'LABEL_0', 'score': 0.6648703, 'index': 55, 'word': 'five', 'start': 220, 'end': 224}, {'entity': 'LABEL_0', 'score': 0.650303, 'index': 56, 'word': 'years', 'start': 225, 'end': 230}, {'entity': 'LABEL_0', 'score': 0.6452194, 'index': 57, 'word': 'ago', 'start': 231, 'end': 234}, {'entity': 'LABEL_1', 'score': 0.50657856, 'index': 58, 'word': ',', 'start': 234, 'end': 235}, {'entity': 'LABEL_1', 'score': 0.50954217, 'index': 59, 'word': 'our', 'start': 236, 'end': 239}, {'entity': 'LABEL_1', 'score': 0.5194712, 'index': 60, 'word': 'Viking', 'start': 240, 'end': 246}, {'entity': 'LABEL_0', 'score': 0.5189833, 'index': 61, 'word': '1', 'start': 247, 'end': 248}, {'entity': 'LABEL_0', 'score': 0.5036279, 'index': 62, 'word': 'spacecraft', 'start': 249, 'end': 259}, {'entity': 'LABEL_0', 'score': 0.5388934, 'index': 63, 'word': 'was', 'start': 260, 'end': 263}, {'entity': 'LABEL_0', 'score': 0.5811445, 'index': 64, 'word': 'ci', 'start': 264, 'end': 266}, {'entity': 'LABEL_0', 'score': 0.60810935, 'index': 65, 'word': '##rc', 'start': 266, 'end': 268}, {'entity': 'LABEL_0', 'score': 0.63988644, 'index': 66, 'word': '##ling', 'start': 268, 'end': 272}, {'entity': 'LABEL_0', 'score': 0.68350035, 'index': 67, 'word': 'the', 'start': 273, 'end': 276}, {'entity': 'LABEL_0', 'score': 0.5718535, 'index': 68, 'word': 'planet', 'start': 277, 'end': 283}, {'entity': 'LABEL_0', 'score': 0.5150864, 'index': 69, 'word': ',', 'start': 283, 'end': 284}, {'entity': 'LABEL_1', 'score': 0.5919597, 'index': 70, 'word': 'sna', 'start': 285, 'end': 288}, {'entity': 'LABEL_1', 'score': 0.5371689, 'index': 71, 'word': '##pping', 'start': 288, 'end': 293}, {'entity': 'LABEL_1', 'score': 0.5501098, 'index': 72, 'word': 'photos', 'start': 294, 'end': 300}, {'entity': 'LABEL_1', 'score': 0.58064604, 'index': 73, 'word': ',', 'start': 300, 'end': 301}, {'entity': 'LABEL_1', 'score': 0.602519, 'index': 74, 'word': 'when', 'start': 302, 'end': 306}, {'entity': 'LABEL_1', 'score': 0.53923917, 'index': 75, 'word': 'it', 'start': 307, 'end': 309}, {'entity': 'LABEL_1', 'score': 0.52948976, 'index': 76, 'word': 'spotted', 'start': 310, 'end': 317}, {'entity': 'LABEL_1', 'score': 0.53173506, 'index': 77, 'word': 'the', 'start': 318, 'end': 321}, {'entity': 'LABEL_1', 'score': 0.5621857, 'index': 78, 'word': 'sh', 'start': 322, 'end': 324}, {'entity': 'LABEL_1', 'score': 0.55711824, 'index': 79, 'word': '##adow', 'start': 324, 'end': 328}, {'entity': 'LABEL_1', 'score': 0.5415988, 'index': 80, 'word': '##y', 'start': 328, 'end': 329}, {'entity': 'LABEL_1', 'score': 0.50422186, 'index': 81, 'word': 'like', 'start': 330, 'end': 334}, {'entity': 'LABEL_1', 'score': 0.50749224, 'index': 82, 'word': '##ness', 'start': 334, 'end': 338}, {'entity': 'LABEL_0', 'score': 0.50134593, 'index': 83, 'word': 'of', 'start': 339, 'end': 341}, {'entity': 'LABEL_1', 'score': 0.5051905, 'index': 84, 'word': 'a', 'start': 342, 'end': 343}, {'entity': 'LABEL_1', 'score': 0.5539653, 'index': 85, 'word': 'human', 'start': 344, 'end': 349}, {'entity': 'LABEL_1', 'score': 0.60916597, 'index': 86, 'word': 'face', 'start': 350, 'end': 354}, {'entity': 'LABEL_1', 'score': 0.6022859, 'index': 87, 'word': '.', 'start': 354, 'end': 355}, {'entity': 'LABEL_1', 'score': 0.61335, 'index': 88, 'word': 'Us', 'start': 356, 'end': 358}, {'entity': 'LABEL_1', 'score': 0.56481904, 'index': 89, 'word': 'scientists', 'start': 359, 'end': 369}, {'entity': 'LABEL_1', 'score': 0.643385, 'index': 90, 'word': 'figure', 'start': 370, 'end': 376}, {'entity': 'LABEL_1', 'score': 0.60697997, 'index': 91, 'word': '##d', 'start': 376, 'end': 377}, {'entity': 'LABEL_1', 'score': 0.558008, 'index': 92, 'word': 'out', 'start': 378, 'end': 381}, {'entity': 'LABEL_1', 'score': 0.5639139, 'index': 93, 'word': 'that', 'start': 382, 'end': 386}, {'entity': 'LABEL_1', 'score': 0.5370862, 'index': 94, 'word': 'it', 'start': 387, 'end': 389}, {'entity': 'LABEL_1', 'score': 0.58077925, 'index': 95, 'word': 'was', 'start': 390, 'end': 393}, {'entity': 'LABEL_1', 'score': 0.51595956, 'index': 96, 'word': 'just', 'start': 394, 'end': 398}, {'entity': 'LABEL_1', 'score': 0.51781327, 'index': 97, 'word': 'another', 'start': 399, 'end': 406}, {'entity': 'LABEL_0', 'score': 0.53742176, 'index': 98, 'word': 'Mart', 'start': 407, 'end': 411}, {'entity': 'LABEL_0', 'score': 0.54195774, 'index': 99, 'word': '##ian', 'start': 411, 'end': 414}, {'entity': 'LABEL_0', 'score': 0.5194467, 'index': 100, 'word': 'mesa', 'start': 415, 'end': 419}, {'entity': 'LABEL_1', 'score': 0.5056166, 'index': 101, 'word': ',', 'start': 419, 'end': 420}, {'entity': 'LABEL_0', 'score': 0.5286414, 'index': 102, 'word': 'common', 'start': 421, 'end': 427}, {'entity': 'LABEL_0', 'score': 0.59091985, 'index': 103, 'word': 'around', 'start': 428, 'end': 434}, {'entity': 'LABEL_0', 'score': 0.5216974, 'index': 104, 'word': 'C', 'start': 435, 'end': 436}, {'entity': 'LABEL_0', 'score': 0.50349146, 'index': 105, 'word': '##yd', 'start': 436, 'end': 438}, {'entity': 'LABEL_0', 'score': 0.53423715, 'index': 106, 'word': '##onia', 'start': 438, 'end': 442}, {'entity': 'LABEL_1', 'score': 0.5149874, 'index': 107, 'word': ',', 'start': 442, 'end': 443}, {'entity': 'LABEL_1', 'score': 0.550047, 'index': 108, 'word': 'only', 'start': 444, 'end': 448}, {'entity': 'LABEL_1', 'score': 0.54703, 'index': 109, 'word': 'this', 'start': 449, 'end': 453}, {'entity': 'LABEL_1', 'score': 0.5274473, 'index': 110, 'word': 'one', 'start': 454, 'end': 457}, {'entity': 'LABEL_1', 'score': 0.60423684, 'index': 111, 'word': 'had', 'start': 458, 'end': 461}, {'entity': 'LABEL_1', 'score': 0.5206097, 'index': 112, 'word': 'sh', 'start': 462, 'end': 464}, {'entity': 'LABEL_1', 'score': 0.5060773, 'index': 113, 'word': '##adow', 'start': 464, 'end': 468}, {'entity': 'LABEL_1', 'score': 0.51237464, 'index': 114, 'word': '##s', 'start': 468, 'end': 469}, {'entity': 'LABEL_1', 'score': 0.5460441, 'index': 115, 'word': 'that', 'start': 470, 'end': 474}, {'entity': 'LABEL_1', 'score': 0.5336468, 'index': 116, 'word': 'made', 'start': 475, 'end': 479}, {'entity': 'LABEL_0', 'score': 0.5088002, 'index': 117, 'word': 'it', 'start': 480, 'end': 482}, {'entity': 'LABEL_0', 'score': 0.5061207, 'index': 118, 'word': 'look', 'start': 483, 'end': 487}, {'entity': 'LABEL_0', 'score': 0.6073445, 'index': 119, 'word': 'like', 'start': 488, 'end': 492}, {'entity': 'LABEL_0', 'score': 0.6060573, 'index': 120, 'word': 'an', 'start': 493, 'end': 495}, {'entity': 'LABEL_0', 'score': 0.5422901, 'index': 121, 'word': 'Egypt', 'start': 496, 'end': 501}, {'entity': 'LABEL_0', 'score': 0.5818128, 'index': 122, 'word': '##ion', 'start': 501, 'end': 504}, {'entity': 'LABEL_0', 'score': 0.57624054, 'index': 123, 'word': 'Ph', 'start': 505, 'end': 507}, {'entity': 'LABEL_0', 'score': 0.55589443, 'index': 124, 'word': '##ara', 'start': 507, 'end': 510}, {'entity': 'LABEL_0', 'score': 0.5907527, 'index': 125, 'word': '##oh', 'start': 510, 'end': 512}, {'entity': 'LABEL_0', 'score': 0.67880136, 'index': 126, 'word': '.', 'start': 512, 'end': 513}, {'entity': 'LABEL_0', 'score': 0.55919296, 'index': 127, 'word': 'Very', 'start': 514, 'end': 518}, {'entity': 'LABEL_0', 'score': 0.6202078, 'index': 128, 'word': 'few', 'start': 519, 'end': 522}, {'entity': 'LABEL_0', 'score': 0.65795386, 'index': 129, 'word': 'days', 'start': 523, 'end': 527}, {'entity': 'LABEL_0', 'score': 0.67949957, 'index': 130, 'word': 'later', 'start': 528, 'end': 533}, {'entity': 'LABEL_1', 'score': 0.5850206, 'index': 131, 'word': ',', 'start': 533, 'end': 534}, {'entity': 'LABEL_1', 'score': 0.6831487, 'index': 132, 'word': 'we', 'start': 535, 'end': 537}, {'entity': 'LABEL_1', 'score': 0.554138, 'index': 133, 'word': 'revealed', 'start': 538, 'end': 546}, {'entity': 'LABEL_0', 'score': 0.50729936, 'index': 134, 'word': 'the', 'start': 547, 'end': 550}, {'entity': 'LABEL_1', 'score': 0.5196401, 'index': 135, 'word': 'image', 'start': 551, 'end': 556}, {'entity': 'LABEL_1', 'score': 0.6263674, 'index': 136, 'word': 'for', 'start': 557, 'end': 560}, {'entity': 'LABEL_1', 'score': 0.6722497, 'index': 137, 'word': 'all', 'start': 561, 'end': 564}, {'entity': 'LABEL_1', 'score': 0.6646968, 'index': 138, 'word': 'to', 'start': 565, 'end': 567}, {'entity': 'LABEL_1', 'score': 0.55594933, 'index': 139, 'word': 'see', 'start': 568, 'end': 571}, {'entity': 'LABEL_1', 'score': 0.6437286, 'index': 140, 'word': ',', 'start': 571, 'end': 572}, {'entity': 'LABEL_1', 'score': 0.6288408, 'index': 141, 'word': 'and', 'start': 573, 'end': 576}, {'entity': 'LABEL_1', 'score': 0.67429084, 'index': 142, 'word': 'we', 'start': 577, 'end': 579}, {'entity': 'LABEL_1', 'score': 0.5014696, 'index': 143, 'word': 'made', 'start': 580, 'end': 584}, {'entity': 'LABEL_0', 'score': 0.54451215, 'index': 144, 'word': 'sure', 'start': 585, 'end': 589}, {'entity': 'LABEL_0', 'score': 0.5620309, 'index': 145, 'word': 'to', 'start': 590, 'end': 592}, {'entity': 'LABEL_0', 'score': 0.5541243, 'index': 146, 'word': 'note', 'start': 593, 'end': 597}, {'entity': 'LABEL_1', 'score': 0.5254825, 'index': 147, 'word': 'that', 'start': 598, 'end': 602}, {'entity': 'LABEL_1', 'score': 0.5633732, 'index': 148, 'word': 'it', 'start': 603, 'end': 605}, {'entity': 'LABEL_1', 'score': 0.5730712, 'index': 149, 'word': 'was', 'start': 606, 'end': 609}, {'entity': 'LABEL_0', 'score': 0.5550324, 'index': 150, 'word': 'a', 'start': 610, 'end': 611}, {'entity': 'LABEL_0', 'score': 0.5353202, 'index': 151, 'word': 'huge', 'start': 612, 'end': 616}, {'entity': 'LABEL_0', 'score': 0.5435573, 'index': 152, 'word': 'rock', 'start': 617, 'end': 621}, {'entity': 'LABEL_0', 'score': 0.54164654, 'index': 153, 'word': 'formation', 'start': 622, 'end': 631}, {'entity': 'LABEL_0', 'score': 0.5117593, 'index': 154, 'word': 'that', 'start': 632, 'end': 636}, {'entity': 'LABEL_1', 'score': 0.51742077, 'index': 155, 'word': 'just', 'start': 637, 'end': 641}, {'entity': 'LABEL_1', 'score': 0.5446302, 'index': 156, 'word': 'res', 'start': 642, 'end': 645}, {'entity': 'LABEL_1', 'score': 0.50045645, 'index': 157, 'word': '##emble', 'start': 645, 'end': 650}, {'entity': 'LABEL_1', 'score': 0.5215423, 'index': 158, 'word': '##d', 'start': 650, 'end': 651}, {'entity': 'LABEL_0', 'score': 0.55436605, 'index': 159, 'word': 'a', 'start': 652, 'end': 653}, {'entity': 'LABEL_1', 'score': 0.5265072, 'index': 160, 'word': 'human', 'start': 654, 'end': 659}, {'entity': 'LABEL_1', 'score': 0.55062467, 'index': 161, 'word': 'head', 'start': 660, 'end': 664}, {'entity': 'LABEL_1', 'score': 0.5384184, 'index': 162, 'word': 'and', 'start': 665, 'end': 668}, {'entity': 'LABEL_1', 'score': 0.5848135, 'index': 163, 'word': 'face', 'start': 669, 'end': 673}, {'entity': 'LABEL_1', 'score': 0.57094294, 'index': 164, 'word': ',', 'start': 673, 'end': 674}, {'entity': 'LABEL_1', 'score': 0.5754807, 'index': 165, 'word': 'but', 'start': 675, 'end': 678}, {'entity': 'LABEL_1', 'score': 0.5758484, 'index': 166, 'word': 'all', 'start': 679, 'end': 682}, {'entity': 'LABEL_1', 'score': 0.54321927, 'index': 167, 'word': 'of', 'start': 683, 'end': 685}, {'entity': 'LABEL_1', 'score': 0.5251982, 'index': 168, 'word': 'it', 'start': 686, 'end': 688}, {'entity': 'LABEL_1', 'score': 0.62576276, 'index': 169, 'word': 'was', 'start': 689, 'end': 692}, {'entity': 'LABEL_1', 'score': 0.5525769, 'index': 170, 'word': 'formed', 'start': 693, 'end': 699}, {'entity': 'LABEL_1', 'score': 0.5072356, 'index': 171, 'word': 'by', 'start': 700, 'end': 702}, {'entity': 'LABEL_1', 'score': 0.54959744, 'index': 172, 'word': 'sh', 'start': 703, 'end': 705}, {'entity': 'LABEL_1', 'score': 0.5374098, 'index': 173, 'word': '##adow', 'start': 705, 'end': 709}, {'entity': 'LABEL_1', 'score': 0.55133754, 'index': 174, 'word': '##s', 'start': 709, 'end': 710}, {'entity': 'LABEL_0', 'score': 0.692533, 'index': 175, 'word': '.', 'start': 710, 'end': 711}, {'entity': 'LABEL_1', 'score': 0.68988186, 'index': 176, 'word': 'We', 'start': 712, 'end': 714}, {'entity': 'LABEL_1', 'score': 0.58056563, 'index': 177, 'word': 'only', 'start': 715, 'end': 719}, {'entity': 'LABEL_1', 'score': 0.52151, 'index': 178, 'word': 'announced', 'start': 720, 'end': 729}, {'entity': 'LABEL_0', 'score': 0.5221571, 'index': 179, 'word': 'it', 'start': 730, 'end': 732}, {'entity': 'LABEL_1', 'score': 0.5608035, 'index': 180, 'word': 'because', 'start': 733, 'end': 740}, {'entity': 'LABEL_1', 'score': 0.6509734, 'index': 181, 'word': 'we', 'start': 741, 'end': 743}, {'entity': 'LABEL_1', 'score': 0.58938694, 'index': 182, 'word': 'thought', 'start': 744, 'end': 751}, {'entity': 'LABEL_1', 'score': 0.5525905, 'index': 183, 'word': 'it', 'start': 752, 'end': 754}, {'entity': 'LABEL_1', 'score': 0.5674345, 'index': 184, 'word': 'would', 'start': 755, 'end': 760}, {'entity': 'LABEL_1', 'score': 0.5639775, 'index': 185, 'word': 'be', 'start': 761, 'end': 763}, {'entity': 'LABEL_1', 'score': 0.501266, 'index': 186, 'word': 'a', 'start': 764, 'end': 765}, {'entity': 'LABEL_0', 'score': 0.50765914, 'index': 187, 'word': 'good', 'start': 766, 'end': 770}, {'entity': 'LABEL_0', 'score': 0.52330655, 'index': 188, 'word': 'way', 'start': 771, 'end': 774}, {'entity': 'LABEL_1', 'score': 0.57470995, 'index': 189, 'word': 'to', 'start': 775, 'end': 777}, {'entity': 'LABEL_1', 'score': 0.61295885, 'index': 190, 'word': 'engage', 'start': 778, 'end': 784}, {'entity': 'LABEL_0', 'score': 0.5334338, 'index': 191, 'word': 'the', 'start': 785, 'end': 788}, {'entity': 'LABEL_0', 'score': 0.51933604, 'index': 192, 'word': 'public', 'start': 789, 'end': 795}, {'entity': 'LABEL_1', 'score': 0.5949142, 'index': 193, 'word': 'with', 'start': 796, 'end': 800}, {'entity': 'LABEL_1', 'score': 0.6003629, 'index': 194, 'word': 'NASA', 'start': 801, 'end': 805}, {'entity': 'LABEL_1', 'score': 0.5744194, 'index': 195, 'word': \"'\", 'start': 805, 'end': 806}, {'entity': 'LABEL_1', 'score': 0.57181925, 'index': 196, 'word': 's', 'start': 806, 'end': 807}, {'entity': 'LABEL_1', 'score': 0.53557616, 'index': 197, 'word': 'findings', 'start': 808, 'end': 816}, {'entity': 'LABEL_1', 'score': 0.6095189, 'index': 198, 'word': ',', 'start': 816, 'end': 817}, {'entity': 'LABEL_1', 'score': 0.6437797, 'index': 199, 'word': 'and', 'start': 818, 'end': 821}, {'entity': 'LABEL_1', 'score': 0.55779886, 'index': 200, 'word': 'at', 'start': 822, 'end': 824}, {'entity': 'LABEL_1', 'score': 0.52227074, 'index': 201, 'word': '##rra', 'start': 824, 'end': 827}, {'entity': 'LABEL_1', 'score': 0.5302549, 'index': 202, 'word': '##ct', 'start': 827, 'end': 829}, {'entity': 'LABEL_0', 'score': 0.50743705, 'index': 203, 'word': 'attention', 'start': 830, 'end': 839}, {'entity': 'LABEL_0', 'score': 0.54697585, 'index': 204, 'word': 'to', 'start': 840, 'end': 842}, {'entity': 'LABEL_0', 'score': 0.51355886, 'index': 205, 'word': 'Mars', 'start': 843, 'end': 847}, {'entity': 'LABEL_1', 'score': 0.5464499, 'index': 206, 'word': '-', 'start': 847, 'end': 848}, {'entity': 'LABEL_1', 'score': 0.59227043, 'index': 207, 'word': '-', 'start': 848, 'end': 849}, {'entity': 'LABEL_1', 'score': 0.594104, 'index': 208, 'word': 'and', 'start': 850, 'end': 853}, {'entity': 'LABEL_1', 'score': 0.5491132, 'index': 209, 'word': 'it', 'start': 854, 'end': 856}, {'entity': 'LABEL_1', 'score': 0.5387794, 'index': 210, 'word': 'did', 'start': 857, 'end': 860}, {'entity': 'LABEL_0', 'score': 0.6830391, 'index': 211, 'word': '.', 'start': 860, 'end': 861}, {'entity': 'LABEL_1', 'score': 0.5619964, 'index': 212, 'word': 'The', 'start': 863, 'end': 866}, {'entity': 'LABEL_1', 'score': 0.6061772, 'index': 213, 'word': 'face', 'start': 867, 'end': 871}, {'entity': 'LABEL_1', 'score': 0.5403782, 'index': 214, 'word': 'on', 'start': 872, 'end': 874}, {'entity': 'LABEL_1', 'score': 0.553967, 'index': 215, 'word': 'Mars', 'start': 875, 'end': 879}, {'entity': 'LABEL_1', 'score': 0.5517386, 'index': 216, 'word': 'soon', 'start': 880, 'end': 884}, {'entity': 'LABEL_1', 'score': 0.5017604, 'index': 217, 'word': 'became', 'start': 885, 'end': 891}, {'entity': 'LABEL_0', 'score': 0.6197886, 'index': 218, 'word': 'a', 'start': 892, 'end': 893}, {'entity': 'LABEL_0', 'score': 0.5807936, 'index': 219, 'word': 'pop', 'start': 894, 'end': 897}, {'entity': 'LABEL_0', 'score': 0.60519373, 'index': 220, 'word': 'i', 'start': 898, 'end': 899}, {'entity': 'LABEL_0', 'score': 0.63208836, 'index': 221, 'word': '##con', 'start': 899, 'end': 902}, {'entity': 'LABEL_1', 'score': 0.69506586, 'index': 222, 'word': ';', 'start': 902, 'end': 903}, {'entity': 'LABEL_1', 'score': 0.5886576, 'index': 223, 'word': 'shot', 'start': 904, 'end': 908}, {'entity': 'LABEL_1', 'score': 0.5002726, 'index': 224, 'word': 'in', 'start': 909, 'end': 911}, {'entity': 'LABEL_1', 'score': 0.557791, 'index': 225, 'word': 'movies', 'start': 912, 'end': 918}, {'entity': 'LABEL_1', 'score': 0.6290412, 'index': 226, 'word': ',', 'start': 918, 'end': 919}, {'entity': 'LABEL_1', 'score': 0.52317363, 'index': 227, 'word': 'appeared', 'start': 920, 'end': 928}, {'entity': 'LABEL_0', 'score': 0.5399968, 'index': 228, 'word': 'in', 'start': 929, 'end': 931}, {'entity': 'LABEL_0', 'score': 0.5414876, 'index': 229, 'word': 'books', 'start': 932, 'end': 937}, {'entity': 'LABEL_0', 'score': 0.5429459, 'index': 230, 'word': ',', 'start': 937, 'end': 938}, {'entity': 'LABEL_0', 'score': 0.5682568, 'index': 231, 'word': 'magazines', 'start': 939, 'end': 948}, {'entity': 'LABEL_0', 'score': 0.5222259, 'index': 232, 'word': ',', 'start': 948, 'end': 949}, {'entity': 'LABEL_0', 'score': 0.5639273, 'index': 233, 'word': 'radio', 'start': 950, 'end': 955}, {'entity': 'LABEL_0', 'score': 0.56908894, 'index': 234, 'word': 'talk', 'start': 956, 'end': 960}, {'entity': 'LABEL_0', 'score': 0.5995537, 'index': 235, 'word': 'shows', 'start': 961, 'end': 966}, {'entity': 'LABEL_1', 'score': 0.55398846, 'index': 236, 'word': ',', 'start': 966, 'end': 967}, {'entity': 'LABEL_1', 'score': 0.53322935, 'index': 237, 'word': 'and', 'start': 968, 'end': 971}, {'entity': 'LABEL_1', 'score': 0.5473446, 'index': 238, 'word': 'hau', 'start': 972, 'end': 975}, {'entity': 'LABEL_1', 'score': 0.5041182, 'index': 239, 'word': '##nted', 'start': 975, 'end': 979}, {'entity': 'LABEL_0', 'score': 0.55119586, 'index': 240, 'word': 'gr', 'start': 980, 'end': 982}, {'entity': 'LABEL_0', 'score': 0.56132877, 'index': 241, 'word': '##oce', 'start': 982, 'end': 985}, {'entity': 'LABEL_0', 'score': 0.55828726, 'index': 242, 'word': '##ry', 'start': 985, 'end': 987}, {'entity': 'LABEL_0', 'score': 0.561964, 'index': 243, 'word': 'store', 'start': 988, 'end': 993}, {'entity': 'LABEL_0', 'score': 0.54090506, 'index': 244, 'word': 'check', 'start': 994, 'end': 999}, {'entity': 'LABEL_0', 'score': 0.54825944, 'index': 245, 'word': '##out', 'start': 999, 'end': 1002}, {'entity': 'LABEL_0', 'score': 0.55701, 'index': 246, 'word': 'lines', 'start': 1003, 'end': 1008}, {'entity': 'LABEL_0', 'score': 0.5457833, 'index': 247, 'word': 'for', 'start': 1009, 'end': 1012}, {'entity': 'LABEL_0', 'score': 0.6821632, 'index': 248, 'word': '25', 'start': 1013, 'end': 1015}, {'entity': 'LABEL_0', 'score': 0.660666, 'index': 249, 'word': 'years', 'start': 1016, 'end': 1021}, {'entity': 'LABEL_0', 'score': 0.68063366, 'index': 250, 'word': '.', 'start': 1021, 'end': 1022}, {'entity': 'LABEL_1', 'score': 0.5878949, 'index': 251, 'word': 'Some', 'start': 1023, 'end': 1027}, {'entity': 'LABEL_1', 'score': 0.56123006, 'index': 252, 'word': 'people', 'start': 1028, 'end': 1034}, {'entity': 'LABEL_1', 'score': 0.6098995, 'index': 253, 'word': 'thought', 'start': 1035, 'end': 1042}, {'entity': 'LABEL_0', 'score': 0.5661758, 'index': 254, 'word': 'the', 'start': 1043, 'end': 1046}, {'entity': 'LABEL_0', 'score': 0.5386031, 'index': 255, 'word': 'natural', 'start': 1047, 'end': 1054}, {'entity': 'LABEL_0', 'score': 0.52325475, 'index': 256, 'word': 'land', 'start': 1055, 'end': 1059}, {'entity': 'LABEL_0', 'score': 0.5261488, 'index': 257, 'word': '##form', 'start': 1059, 'end': 1063}, {'entity': 'LABEL_1', 'score': 0.5174967, 'index': 258, 'word': 'was', 'start': 1064, 'end': 1067}, {'entity': 'LABEL_0', 'score': 0.5168788, 'index': 259, 'word': 'evidence', 'start': 1068, 'end': 1076}, {'entity': 'LABEL_0', 'score': 0.6249421, 'index': 260, 'word': 'of', 'start': 1077, 'end': 1079}, {'entity': 'LABEL_0', 'score': 0.70964944, 'index': 261, 'word': 'life', 'start': 1080, 'end': 1084}, {'entity': 'LABEL_0', 'score': 0.685455, 'index': 262, 'word': 'on', 'start': 1085, 'end': 1087}, {'entity': 'LABEL_0', 'score': 0.5668801, 'index': 263, 'word': 'Mars', 'start': 1088, 'end': 1092}, {'entity': 'LABEL_0', 'score': 0.5259334, 'index': 264, 'word': ',', 'start': 1092, 'end': 1093}, {'entity': 'LABEL_1', 'score': 0.60075504, 'index': 265, 'word': 'and', 'start': 1094, 'end': 1097}, {'entity': 'LABEL_1', 'score': 0.6089564, 'index': 266, 'word': 'that', 'start': 1098, 'end': 1102}, {'entity': 'LABEL_1', 'score': 0.5566934, 'index': 267, 'word': 'us', 'start': 1103, 'end': 1105}, {'entity': 'LABEL_1', 'score': 0.565986, 'index': 268, 'word': 'scientists', 'start': 1106, 'end': 1116}, {'entity': 'LABEL_1', 'score': 0.63284963, 'index': 269, 'word': 'wanted', 'start': 1117, 'end': 1123}, {'entity': 'LABEL_1', 'score': 0.5852543, 'index': 270, 'word': 'to', 'start': 1124, 'end': 1126}, {'entity': 'LABEL_1', 'score': 0.58516186, 'index': 271, 'word': 'hide', 'start': 1127, 'end': 1131}, {'entity': 'LABEL_1', 'score': 0.5035104, 'index': 272, 'word': 'it', 'start': 1132, 'end': 1134}, {'entity': 'LABEL_1', 'score': 0.55418307, 'index': 273, 'word': ',', 'start': 1134, 'end': 1135}, {'entity': 'LABEL_1', 'score': 0.632634, 'index': 274, 'word': 'but', 'start': 1136, 'end': 1139}, {'entity': 'LABEL_1', 'score': 0.64646935, 'index': 275, 'word': 'really', 'start': 1140, 'end': 1146}, {'entity': 'LABEL_1', 'score': 0.60271597, 'index': 276, 'word': ',', 'start': 1146, 'end': 1147}, {'entity': 'LABEL_1', 'score': 0.55116373, 'index': 277, 'word': 'the', 'start': 1148, 'end': 1151}, {'entity': 'LABEL_1', 'score': 0.5534294, 'index': 278, 'word': 'defender', 'start': 1152, 'end': 1160}, {'entity': 'LABEL_0', 'score': 0.51253384, 'index': 279, 'word': '##s', 'start': 1160, 'end': 1161}, {'entity': 'LABEL_0', 'score': 0.51277995, 'index': 280, 'word': 'of', 'start': 1162, 'end': 1164}, {'entity': 'LABEL_0', 'score': 0.50931233, 'index': 281, 'word': 'the', 'start': 1165, 'end': 1168}, {'entity': 'LABEL_1', 'score': 0.5213263, 'index': 282, 'word': 'NASA', 'start': 1169, 'end': 1173}, {'entity': 'LABEL_1', 'score': 0.52774245, 'index': 283, 'word': 'budget', 'start': 1174, 'end': 1180}, {'entity': 'LABEL_1', 'score': 0.6431934, 'index': 284, 'word': 'wish', 'start': 1181, 'end': 1185}, {'entity': 'LABEL_0', 'score': 0.6439814, 'index': 285, 'word': 'there', 'start': 1186, 'end': 1191}, {'entity': 'LABEL_0', 'score': 0.6172206, 'index': 286, 'word': 'was', 'start': 1192, 'end': 1195}, {'entity': 'LABEL_0', 'score': 0.6588487, 'index': 287, 'word': 'ancient', 'start': 1196, 'end': 1203}, {'entity': 'LABEL_0', 'score': 0.66718197, 'index': 288, 'word': 'civili', 'start': 1204, 'end': 1210}, {'entity': 'LABEL_0', 'score': 0.67449886, 'index': 289, 'word': '##zation', 'start': 1210, 'end': 1216}, {'entity': 'LABEL_0', 'score': 0.6369945, 'index': 290, 'word': 'on', 'start': 1217, 'end': 1219}, {'entity': 'LABEL_0', 'score': 0.54807943, 'index': 291, 'word': 'Mars', 'start': 1220, 'end': 1224}, {'entity': 'LABEL_0', 'score': 0.6829598, 'index': 292, 'word': '.', 'start': 1224, 'end': 1225}, {'entity': 'LABEL_1', 'score': 0.69923604, 'index': 293, 'word': 'We', 'start': 1226, 'end': 1228}, {'entity': 'LABEL_1', 'score': 0.5304869, 'index': 294, 'word': 'decided', 'start': 1229, 'end': 1236}, {'entity': 'LABEL_0', 'score': 0.50105697, 'index': 295, 'word': 'to', 'start': 1237, 'end': 1239}, {'entity': 'LABEL_1', 'score': 0.52295804, 'index': 296, 'word': 'take', 'start': 1240, 'end': 1244}, {'entity': 'LABEL_0', 'score': 0.50881416, 'index': 297, 'word': 'another', 'start': 1245, 'end': 1252}, {'entity': 'LABEL_1', 'score': 0.51416457, 'index': 298, 'word': 'shot', 'start': 1253, 'end': 1257}, {'entity': 'LABEL_1', 'score': 0.5382968, 'index': 299, 'word': 'just', 'start': 1258, 'end': 1262}, {'entity': 'LABEL_1', 'score': 0.56296045, 'index': 300, 'word': 'to', 'start': 1263, 'end': 1265}, {'entity': 'LABEL_1', 'score': 0.59076697, 'index': 301, 'word': 'make', 'start': 1266, 'end': 1270}, {'entity': 'LABEL_1', 'score': 0.5739148, 'index': 302, 'word': 'sure', 'start': 1271, 'end': 1275}, {'entity': 'LABEL_1', 'score': 0.6504669, 'index': 303, 'word': 'we', 'start': 1276, 'end': 1278}, {'entity': 'LABEL_1', 'score': 0.6707218, 'index': 304, 'word': 'were', 'start': 1279, 'end': 1283}, {'entity': 'LABEL_1', 'score': 0.5913361, 'index': 305, 'word': '##n', 'start': 1283, 'end': 1284}, {'entity': 'LABEL_1', 'score': 0.62763625, 'index': 306, 'word': \"'\", 'start': 1284, 'end': 1285}, {'entity': 'LABEL_1', 'score': 0.6344838, 'index': 307, 'word': 't', 'start': 1285, 'end': 1286}, {'entity': 'LABEL_1', 'score': 0.65749675, 'index': 308, 'word': 'wrong', 'start': 1287, 'end': 1292}, {'entity': 'LABEL_1', 'score': 0.59669024, 'index': 309, 'word': ',', 'start': 1292, 'end': 1293}, {'entity': 'LABEL_1', 'score': 0.5044974, 'index': 310, 'word': 'on', 'start': 1294, 'end': 1296}, {'entity': 'LABEL_0', 'score': 0.5337205, 'index': 311, 'word': 'April', 'start': 1297, 'end': 1302}, {'entity': 'LABEL_0', 'score': 0.57182956, 'index': 312, 'word': '5', 'start': 1303, 'end': 1304}, {'entity': 'LABEL_0', 'score': 0.5762897, 'index': 313, 'word': ',', 'start': 1304, 'end': 1305}, {'entity': 'LABEL_1', 'score': 0.5169943, 'index': 314, 'word': '1998', 'start': 1306, 'end': 1310}, {'entity': 'LABEL_1', 'score': 0.5751032, 'index': 315, 'word': '.', 'start': 1310, 'end': 1311}, {'entity': 'LABEL_1', 'score': 0.61884916, 'index': 316, 'word': 'Michael', 'start': 1312, 'end': 1319}, {'entity': 'LABEL_1', 'score': 0.6418079, 'index': 317, 'word': 'Mali', 'start': 1320, 'end': 1324}, {'entity': 'LABEL_1', 'score': 0.5908292, 'index': 318, 'word': '##n', 'start': 1324, 'end': 1325}, {'entity': 'LABEL_1', 'score': 0.6004406, 'index': 319, 'word': 'and', 'start': 1326, 'end': 1329}, {'entity': 'LABEL_1', 'score': 0.5574665, 'index': 320, 'word': 'his', 'start': 1330, 'end': 1333}, {'entity': 'LABEL_1', 'score': 0.57928157, 'index': 321, 'word': 'Mars', 'start': 1334, 'end': 1338}, {'entity': 'LABEL_1', 'score': 0.54800403, 'index': 322, 'word': 'Or', 'start': 1339, 'end': 1341}, {'entity': 'LABEL_1', 'score': 0.52113616, 'index': 323, 'word': '##biter', 'start': 1341, 'end': 1346}, {'entity': 'LABEL_1', 'score': 0.5951806, 'index': 324, 'word': 'camera', 'start': 1347, 'end': 1353}, {'entity': 'LABEL_1', 'score': 0.6002175, 'index': 325, 'word': 'team', 'start': 1354, 'end': 1358}, {'entity': 'LABEL_0', 'score': 0.50268763, 'index': 326, 'word': 'took', 'start': 1359, 'end': 1363}, {'entity': 'LABEL_0', 'score': 0.5686814, 'index': 327, 'word': 'a', 'start': 1364, 'end': 1365}, {'entity': 'LABEL_0', 'score': 0.567501, 'index': 328, 'word': 'picture', 'start': 1366, 'end': 1373}, {'entity': 'LABEL_0', 'score': 0.5302457, 'index': 329, 'word': 'that', 'start': 1374, 'end': 1378}, {'entity': 'LABEL_1', 'score': 0.5295077, 'index': 330, 'word': 'was', 'start': 1379, 'end': 1382}, {'entity': 'LABEL_0', 'score': 0.55978584, 'index': 331, 'word': 'ten', 'start': 1383, 'end': 1386}, {'entity': 'LABEL_0', 'score': 0.56129557, 'index': 332, 'word': 'times', 'start': 1387, 'end': 1392}, {'entity': 'LABEL_0', 'score': 0.5239482, 'index': 333, 'word': 'sharp', 'start': 1393, 'end': 1398}, {'entity': 'LABEL_0', 'score': 0.60185, 'index': 334, 'word': '##er', 'start': 1398, 'end': 1400}, {'entity': 'LABEL_0', 'score': 0.619771, 'index': 335, 'word': 'than', 'start': 1401, 'end': 1405}, {'entity': 'LABEL_0', 'score': 0.56753343, 'index': 336, 'word': 'the', 'start': 1406, 'end': 1409}, {'entity': 'LABEL_0', 'score': 0.5345476, 'index': 337, 'word': 'original', 'start': 1410, 'end': 1418}, {'entity': 'LABEL_0', 'score': 0.53111845, 'index': 338, 'word': 'Viking', 'start': 1419, 'end': 1425}, {'entity': 'LABEL_0', 'score': 0.51020545, 'index': 339, 'word': 'photos', 'start': 1426, 'end': 1432}, {'entity': 'LABEL_1', 'score': 0.5032632, 'index': 340, 'word': ',', 'start': 1432, 'end': 1433}, {'entity': 'LABEL_0', 'score': 0.5354122, 'index': 341, 'word': 'reveal', 'start': 1434, 'end': 1440}, {'entity': 'LABEL_0', 'score': 0.54571337, 'index': 342, 'word': '##ing', 'start': 1440, 'end': 1443}, {'entity': 'LABEL_0', 'score': 0.6040675, 'index': 343, 'word': 'a', 'start': 1444, 'end': 1445}, {'entity': 'LABEL_0', 'score': 0.5651683, 'index': 344, 'word': 'natural', 'start': 1446, 'end': 1453}, {'entity': 'LABEL_0', 'score': 0.54561985, 'index': 345, 'word': 'land', 'start': 1454, 'end': 1458}, {'entity': 'LABEL_0', 'score': 0.5547739, 'index': 346, 'word': '##form', 'start': 1458, 'end': 1462}, {'entity': 'LABEL_0', 'score': 0.54526246, 'index': 347, 'word': ',', 'start': 1462, 'end': 1463}, {'entity': 'LABEL_0', 'score': 0.5390101, 'index': 348, 'word': 'which', 'start': 1464, 'end': 1469}, {'entity': 'LABEL_0', 'score': 0.5140612, 'index': 349, 'word': 'meant', 'start': 1470, 'end': 1475}, {'entity': 'LABEL_0', 'score': 0.5335286, 'index': 350, 'word': 'no', 'start': 1476, 'end': 1478}, {'entity': 'LABEL_1', 'score': 0.53740835, 'index': 351, 'word': 'alien', 'start': 1479, 'end': 1484}, {'entity': 'LABEL_0', 'score': 0.5383639, 'index': 352, 'word': 'monument', 'start': 1485, 'end': 1493}, {'entity': 'LABEL_0', 'score': 0.678678, 'index': 353, 'word': '.', 'start': 1493, 'end': 1494}, {'entity': 'LABEL_0', 'score': 0.5126649, 'index': 354, 'word': '\"', 'start': 1495, 'end': 1496}, {'entity': 'LABEL_1', 'score': 0.5785337, 'index': 355, 'word': 'But', 'start': 1496, 'end': 1499}, {'entity': 'LABEL_0', 'score': 0.51326215, 'index': 356, 'word': 'that', 'start': 1500, 'end': 1504}, {'entity': 'LABEL_0', 'score': 0.52528965, 'index': 357, 'word': 'picture', 'start': 1505, 'end': 1512}, {'entity': 'LABEL_1', 'score': 0.6512115, 'index': 358, 'word': 'wasn', 'start': 1513, 'end': 1517}, {'entity': 'LABEL_1', 'score': 0.60384643, 'index': 359, 'word': \"'\", 'start': 1517, 'end': 1518}, {'entity': 'LABEL_1', 'score': 0.59462476, 'index': 360, 'word': 't', 'start': 1518, 'end': 1519}, {'entity': 'LABEL_1', 'score': 0.63325346, 'index': 361, 'word': 'very', 'start': 1520, 'end': 1524}, {'entity': 'LABEL_1', 'score': 0.6392778, 'index': 362, 'word': 'clear', 'start': 1525, 'end': 1530}, {'entity': 'LABEL_1', 'score': 0.6274112, 'index': 363, 'word': 'at', 'start': 1531, 'end': 1533}, {'entity': 'LABEL_1', 'score': 0.63470787, 'index': 364, 'word': 'all', 'start': 1534, 'end': 1537}, {'entity': 'LABEL_1', 'score': 0.5834824, 'index': 365, 'word': ',', 'start': 1537, 'end': 1538}, {'entity': 'LABEL_1', 'score': 0.6205755, 'index': 366, 'word': 'which', 'start': 1539, 'end': 1544}, {'entity': 'LABEL_1', 'score': 0.6719913, 'index': 367, 'word': 'could', 'start': 1545, 'end': 1550}, {'entity': 'LABEL_1', 'score': 0.62575376, 'index': 368, 'word': 'mean', 'start': 1551, 'end': 1555}, {'entity': 'LABEL_1', 'score': 0.5630169, 'index': 369, 'word': 'alien', 'start': 1556, 'end': 1561}, {'entity': 'LABEL_1', 'score': 0.51273495, 'index': 370, 'word': 'marking', 'start': 1562, 'end': 1569}, {'entity': 'LABEL_1', 'score': 0.5028247, 'index': 371, 'word': '##s', 'start': 1569, 'end': 1570}, {'entity': 'LABEL_1', 'score': 0.6543652, 'index': 372, 'word': 'were', 'start': 1571, 'end': 1575}, {'entity': 'LABEL_1', 'score': 0.5995173, 'index': 373, 'word': 'hidden', 'start': 1576, 'end': 1582}, {'entity': 'LABEL_1', 'score': 0.54646325, 'index': 374, 'word': 'by', 'start': 1583, 'end': 1585}, {'entity': 'LABEL_0', 'score': 0.53214496, 'index': 375, 'word': 'ha', 'start': 1586, 'end': 1588}, {'entity': 'LABEL_0', 'score': 0.5461479, 'index': 376, 'word': '##ze', 'start': 1588, 'end': 1590}, {'entity': 'LABEL_1', 'score': 0.50528497, 'index': 377, 'word': '\"', 'start': 1590, 'end': 1591}, {'entity': 'LABEL_1', 'score': 0.55964, 'index': 378, 'word': 'Well', 'start': 1592, 'end': 1596}, {'entity': 'LABEL_1', 'score': 0.50823045, 'index': 379, 'word': 'no', 'start': 1597, 'end': 1599}, {'entity': 'LABEL_1', 'score': 0.54237086, 'index': 380, 'word': ',', 'start': 1599, 'end': 1600}, {'entity': 'LABEL_1', 'score': 0.54179376, 'index': 381, 'word': 'ye', 'start': 1601, 'end': 1603}, {'entity': 'LABEL_1', 'score': 0.5154993, 'index': 382, 'word': '##s', 'start': 1603, 'end': 1604}, {'entity': 'LABEL_0', 'score': 0.54574096, 'index': 383, 'word': 'that', 'start': 1605, 'end': 1609}, {'entity': 'LABEL_0', 'score': 0.5339636, 'index': 384, 'word': 'rum', 'start': 1610, 'end': 1613}, {'entity': 'LABEL_0', 'score': 0.60298085, 'index': 385, 'word': '##or', 'start': 1613, 'end': 1615}, {'entity': 'LABEL_0', 'score': 0.5779869, 'index': 386, 'word': 'started', 'start': 1616, 'end': 1623}, {'entity': 'LABEL_1', 'score': 0.57263666, 'index': 387, 'word': ',', 'start': 1623, 'end': 1624}, {'entity': 'LABEL_1', 'score': 0.58315307, 'index': 388, 'word': 'but', 'start': 1625, 'end': 1628}, {'entity': 'LABEL_1', 'score': 0.5169765, 'index': 389, 'word': 'to', 'start': 1629, 'end': 1631}, {'entity': 'LABEL_1', 'score': 0.53323334, 'index': 390, 'word': 'prove', 'start': 1632, 'end': 1637}, {'entity': 'LABEL_1', 'score': 0.5946281, 'index': 391, 'word': 'them', 'start': 1638, 'end': 1642}, {'entity': 'LABEL_1', 'score': 0.6326935, 'index': 392, 'word': 'wrong', 'start': 1643, 'end': 1648}, {'entity': 'LABEL_0', 'score': 0.51224774, 'index': 393, 'word': 'on', 'start': 1649, 'end': 1651}, {'entity': 'LABEL_0', 'score': 0.50525755, 'index': 394, 'word': 'April', 'start': 1652, 'end': 1657}, {'entity': 'LABEL_0', 'score': 0.53439724, 'index': 395, 'word': '8', 'start': 1658, 'end': 1659}, {'entity': 'LABEL_0', 'score': 0.5542627, 'index': 396, 'word': ',', 'start': 1659, 'end': 1660}, {'entity': 'LABEL_1', 'score': 0.52382576, 'index': 397, 'word': '2001', 'start': 1661, 'end': 1665}, {'entity': 'LABEL_1', 'score': 0.67537016, 'index': 398, 'word': 'we', 'start': 1666, 'end': 1668}, {'entity': 'LABEL_1', 'score': 0.5359309, 'index': 399, 'word': 'decided', 'start': 1669, 'end': 1676}, {'entity': 'LABEL_1', 'score': 0.5115704, 'index': 400, 'word': 'to', 'start': 1677, 'end': 1679}, {'entity': 'LABEL_1', 'score': 0.51936895, 'index': 401, 'word': 'take', 'start': 1680, 'end': 1684}, {'entity': 'LABEL_1', 'score': 0.5070548, 'index': 402, 'word': 'another', 'start': 1685, 'end': 1692}, {'entity': 'LABEL_0', 'score': 0.5236533, 'index': 403, 'word': 'picture', 'start': 1693, 'end': 1700}, {'entity': 'LABEL_1', 'score': 0.57663405, 'index': 404, 'word': ',', 'start': 1700, 'end': 1701}, {'entity': 'LABEL_1', 'score': 0.6072539, 'index': 405, 'word': 'making', 'start': 1702, 'end': 1708}, {'entity': 'LABEL_1', 'score': 0.5408605, 'index': 406, 'word': 'sure', 'start': 1709, 'end': 1713}, {'entity': 'LABEL_0', 'score': 0.538262, 'index': 407, 'word': 'it', 'start': 1714, 'end': 1716}, {'entity': 'LABEL_0', 'score': 0.5418676, 'index': 408, 'word': 'was', 'start': 1717, 'end': 1720}, {'entity': 'LABEL_0', 'score': 0.6419493, 'index': 409, 'word': 'a', 'start': 1721, 'end': 1722}, {'entity': 'LABEL_0', 'score': 0.61665875, 'index': 410, 'word': 'cloud', 'start': 1723, 'end': 1728}, {'entity': 'LABEL_0', 'score': 0.65615964, 'index': 411, 'word': '##less', 'start': 1728, 'end': 1732}, {'entity': 'LABEL_0', 'score': 0.65491146, 'index': 412, 'word': 'summer', 'start': 1733, 'end': 1739}, {'entity': 'LABEL_0', 'score': 0.68787396, 'index': 413, 'word': 'day', 'start': 1740, 'end': 1743}, {'entity': 'LABEL_0', 'score': 0.6984638, 'index': 414, 'word': '.', 'start': 1743, 'end': 1744}, {'entity': 'LABEL_1', 'score': 0.67899203, 'index': 415, 'word': 'Mali', 'start': 1745, 'end': 1749}, {'entity': 'LABEL_1', 'score': 0.6550556, 'index': 416, 'word': '##n', 'start': 1749, 'end': 1750}, {'entity': 'LABEL_1', 'score': 0.66404665, 'index': 417, 'word': \"'\", 'start': 1750, 'end': 1751}, {'entity': 'LABEL_1', 'score': 0.6497539, 'index': 418, 'word': 's', 'start': 1751, 'end': 1752}, {'entity': 'LABEL_1', 'score': 0.6664745, 'index': 419, 'word': 'team', 'start': 1753, 'end': 1757}, {'entity': 'LABEL_1', 'score': 0.5993111, 'index': 420, 'word': 'captured', 'start': 1758, 'end': 1766}, {'entity': 'LABEL_1', 'score': 0.54670244, 'index': 421, 'word': 'an', 'start': 1767, 'end': 1769}, {'entity': 'LABEL_1', 'score': 0.5774647, 'index': 422, 'word': 'ama', 'start': 1770, 'end': 1773}, {'entity': 'LABEL_1', 'score': 0.561597, 'index': 423, 'word': '##zing', 'start': 1773, 'end': 1777}, {'entity': 'LABEL_1', 'score': 0.5217397, 'index': 424, 'word': 'photo', 'start': 1778, 'end': 1783}, {'entity': 'LABEL_1', 'score': 0.5875206, 'index': 425, 'word': 'using', 'start': 1784, 'end': 1789}, {'entity': 'LABEL_1', 'score': 0.5692844, 'index': 426, 'word': 'the', 'start': 1790, 'end': 1793}, {'entity': 'LABEL_1', 'score': 0.5843616, 'index': 427, 'word': 'camera', 'start': 1794, 'end': 1800}, {'entity': 'LABEL_1', 'score': 0.5179869, 'index': 428, 'word': \"'\", 'start': 1800, 'end': 1801}, {'entity': 'LABEL_1', 'score': 0.50458616, 'index': 429, 'word': 's', 'start': 1801, 'end': 1802}, {'entity': 'LABEL_1', 'score': 0.50684035, 'index': 430, 'word': 'absolute', 'start': 1803, 'end': 1811}, {'entity': 'LABEL_1', 'score': 0.52807856, 'index': 431, 'word': 'maximum', 'start': 1812, 'end': 1819}, {'entity': 'LABEL_1', 'score': 0.51326877, 'index': 432, 'word': 'revolution', 'start': 1820, 'end': 1830}, {'entity': 'LABEL_1', 'score': 0.55913496, 'index': 433, 'word': '.', 'start': 1830, 'end': 1831}, {'entity': 'LABEL_1', 'score': 0.6561321, 'index': 434, 'word': 'With', 'start': 1832, 'end': 1836}, {'entity': 'LABEL_1', 'score': 0.6042771, 'index': 435, 'word': 'this', 'start': 1837, 'end': 1841}, {'entity': 'LABEL_1', 'score': 0.62364846, 'index': 436, 'word': 'camera', 'start': 1842, 'end': 1848}, {'entity': 'LABEL_1', 'score': 0.7165416, 'index': 437, 'word': 'you', 'start': 1849, 'end': 1852}, {'entity': 'LABEL_1', 'score': 0.72462106, 'index': 438, 'word': 'can', 'start': 1853, 'end': 1856}, {'entity': 'LABEL_1', 'score': 0.5962723, 'index': 439, 'word': 'disc', 'start': 1857, 'end': 1861}, {'entity': 'LABEL_1', 'score': 0.5619629, 'index': 440, 'word': '##ern', 'start': 1861, 'end': 1864}, {'entity': 'LABEL_1', 'score': 0.6288542, 'index': 441, 'word': 'things', 'start': 1865, 'end': 1871}, {'entity': 'LABEL_1', 'score': 0.6048734, 'index': 442, 'word': 'in', 'start': 1872, 'end': 1874}, {'entity': 'LABEL_1', 'score': 0.58315647, 'index': 443, 'word': 'a', 'start': 1875, 'end': 1876}, {'entity': 'LABEL_1', 'score': 0.6455242, 'index': 444, 'word': 'digital', 'start': 1877, 'end': 1884}, {'entity': 'LABEL_1', 'score': 0.61327535, 'index': 445, 'word': 'image', 'start': 1885, 'end': 1890}, {'entity': 'LABEL_1', 'score': 0.6229006, 'index': 446, 'word': ',', 'start': 1890, 'end': 1891}, {'entity': 'LABEL_1', 'score': 0.5079185, 'index': 447, 'word': '3', 'start': 1892, 'end': 1893}, {'entity': 'LABEL_1', 'score': 0.5326037, 'index': 448, 'word': 'times', 'start': 1894, 'end': 1899}, {'entity': 'LABEL_1', 'score': 0.53250307, 'index': 449, 'word': 'bigger', 'start': 1900, 'end': 1906}, {'entity': 'LABEL_0', 'score': 0.5356584, 'index': 450, 'word': 'than', 'start': 1907, 'end': 1911}, {'entity': 'LABEL_0', 'score': 0.5484522, 'index': 451, 'word': 'the', 'start': 1912, 'end': 1915}, {'entity': 'LABEL_1', 'score': 0.5069752, 'index': 452, 'word': 'pi', 'start': 1916, 'end': 1918}, {'entity': 'LABEL_1', 'score': 0.53663635, 'index': 453, 'word': '##xel', 'start': 1918, 'end': 1921}, {'entity': 'LABEL_0', 'score': 0.5043374, 'index': 454, 'word': 'size', 'start': 1922, 'end': 1926}, {'entity': 'LABEL_1', 'score': 0.50889504, 'index': 455, 'word': 'which', 'start': 1927, 'end': 1932}, {'entity': 'LABEL_1', 'score': 0.5591767, 'index': 456, 'word': 'means', 'start': 1933, 'end': 1938}, {'entity': 'LABEL_1', 'score': 0.6113632, 'index': 457, 'word': 'if', 'start': 1939, 'end': 1941}, {'entity': 'LABEL_0', 'score': 0.5393998, 'index': 458, 'word': 'there', 'start': 1942, 'end': 1947}, {'entity': 'LABEL_1', 'score': 0.5202561, 'index': 459, 'word': 'were', 'start': 1948, 'end': 1952}, {'entity': 'LABEL_1', 'score': 0.5006454, 'index': 460, 'word': 'any', 'start': 1953, 'end': 1956}, {'entity': 'LABEL_0', 'score': 0.54660577, 'index': 461, 'word': 'signs', 'start': 1957, 'end': 1962}, {'entity': 'LABEL_0', 'score': 0.6116034, 'index': 462, 'word': 'of', 'start': 1963, 'end': 1965}, {'entity': 'LABEL_0', 'score': 0.6801252, 'index': 463, 'word': 'life', 'start': 1966, 'end': 1970}, {'entity': 'LABEL_1', 'score': 0.52473795, 'index': 464, 'word': ',', 'start': 1970, 'end': 1971}, {'entity': 'LABEL_1', 'score': 0.69474596, 'index': 465, 'word': 'you', 'start': 1972, 'end': 1975}, {'entity': 'LABEL_1', 'score': 0.7024561, 'index': 466, 'word': 'could', 'start': 1976, 'end': 1981}, {'entity': 'LABEL_1', 'score': 0.63681555, 'index': 467, 'word': 'easily', 'start': 1982, 'end': 1988}, {'entity': 'LABEL_1', 'score': 0.5592056, 'index': 468, 'word': 'see', 'start': 1989, 'end': 1992}, {'entity': 'LABEL_1', 'score': 0.5347693, 'index': 469, 'word': 'what', 'start': 1993, 'end': 1997}, {'entity': 'LABEL_1', 'score': 0.55739254, 'index': 470, 'word': 'they', 'start': 1998, 'end': 2002}, {'entity': 'LABEL_1', 'score': 0.577428, 'index': 471, 'word': 'were', 'start': 2003, 'end': 2007}, {'entity': 'LABEL_0', 'score': 0.50032717, 'index': 472, 'word': '.', 'start': 2007, 'end': 2008}, {'entity': 'LABEL_1', 'score': 0.5744039, 'index': 473, 'word': 'What', 'start': 2009, 'end': 2013}, {'entity': 'LABEL_1', 'score': 0.51382345, 'index': 474, 'word': 'the', 'start': 2014, 'end': 2017}, {'entity': 'LABEL_1', 'score': 0.5102362, 'index': 475, 'word': 'picture', 'start': 2018, 'end': 2025}, {'entity': 'LABEL_1', 'score': 0.5715643, 'index': 476, 'word': 'showed', 'start': 2026, 'end': 2032}, {'entity': 'LABEL_1', 'score': 0.54575837, 'index': 477, 'word': 'was', 'start': 2033, 'end': 2036}, {'entity': 'LABEL_0', 'score': 0.604859, 'index': 478, 'word': 'the', 'start': 2037, 'end': 2040}, {'entity': 'LABEL_0', 'score': 0.5699074, 'index': 479, 'word': 'but', 'start': 2041, 'end': 2044}, {'entity': 'LABEL_0', 'score': 0.5952769, 'index': 480, 'word': '##te', 'start': 2044, 'end': 2046}, {'entity': 'LABEL_0', 'score': 0.6050275, 'index': 481, 'word': 'or', 'start': 2047, 'end': 2049}, {'entity': 'LABEL_0', 'score': 0.53718954, 'index': 482, 'word': 'mesa', 'start': 2050, 'end': 2054}, {'entity': 'LABEL_0', 'score': 0.54129803, 'index': 483, 'word': ',', 'start': 2054, 'end': 2055}, {'entity': 'LABEL_1', 'score': 0.5079675, 'index': 484, 'word': 'which', 'start': 2056, 'end': 2061}, {'entity': 'LABEL_0', 'score': 0.5088303, 'index': 485, 'word': 'are', 'start': 2062, 'end': 2065}, {'entity': 'LABEL_0', 'score': 0.552098, 'index': 486, 'word': 'land', 'start': 2066, 'end': 2070}, {'entity': 'LABEL_0', 'score': 0.5434629, 'index': 487, 'word': '##form', 'start': 2070, 'end': 2074}, {'entity': 'LABEL_0', 'score': 0.56481916, 'index': 488, 'word': '##s', 'start': 2074, 'end': 2075}, {'entity': 'LABEL_0', 'score': 0.52328765, 'index': 489, 'word': 'common', 'start': 2076, 'end': 2082}, {'entity': 'LABEL_0', 'score': 0.5769157, 'index': 490, 'word': 'around', 'start': 2083, 'end': 2089}, {'entity': 'LABEL_0', 'score': 0.58156013, 'index': 491, 'word': 'the', 'start': 2090, 'end': 2093}, {'entity': 'LABEL_0', 'score': 0.5761414, 'index': 492, 'word': 'American', 'start': 2094, 'end': 2102}, {'entity': 'LABEL_0', 'score': 0.60019207, 'index': 493, 'word': 'West', 'start': 2103, 'end': 2107}, {'entity': 'LABEL_0', 'score': 0.56151944, 'index': 494, 'word': '.', 'start': 2107, 'end': 2108}]\n" ] } ], "source": [ "import json\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"numind/NuNER-multilingual-v0.1\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"numind/NuNER-multilingual-v0.1\")\n", "classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "salida=classifier(text)\n", "print(salida)" ] }, { "cell_type": "code", "execution_count": 51, "id": "10ba9112-0180-4cd3-8d4b-b548101382fd", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "LABEL_0 188\n", "LABEL_1 306\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word \n", "LABEL_0 1\n", " ##ara 1\n", " ##con 1\n", " ##er 1\n", " ##form 3\n", " ##ian 1\n", " ##ing 1\n", " ##ion 1\n", " ##less 1\n", " ##ling 1\n", " ##oce 1\n", " ##oh 1\n", " ##onia 1\n", " ##or 1\n", " ##out 1\n", " ##rc 1\n", " ##ry 1\n", " ##s 3\n", " ##te 1\n", " ##yd 1\n", " ##zation 1\n", " ##ze 1\n", " , 8\n", " . 9\n", " 1 1\n", " 25 1\n", " 5 1\n", " 8 1\n", " American 1\n", " April 2\n", " C 1\n", " Egypt 1\n", " Mars 4\n", " Mart 1\n", " Ph 1\n", " Very 1\n", " Viking 1\n", " West 1\n", " a 6\n", " about 1\n", " ago 1\n", " alien 1\n", " an 1\n", " ancient 1\n", " another 1\n", " are 1\n", " around 2\n", " attention 1\n", " books 1\n", " but 1\n", " by 1\n", " check 1\n", " ci 1\n", " civili 1\n", " cloud 1\n", " common 2\n", " day 1\n", " days 1\n", " evidence 1\n", " few 1\n", " five 1\n", " for 1\n", " formation 1\n", " good 1\n", " gr 1\n", " ha 1\n", " huge 1\n", " i 1\n", " in 1\n", " is 1\n", " it 3\n", " land 3\n", " later 1\n", " life 3\n", " like 1\n", " lines 1\n", " look 1\n", " magazines 1\n", " meant 1\n", " mesa 2\n", " monument 1\n", " natural 2\n", " no 1\n", " note 1\n", " of 4\n", " on 4\n", " or 1\n", " original 1\n", " photos 1\n", " picture 3\n", " planet 1\n", " pop 1\n", " public 1\n", " radio 1\n", " reveal 1\n", " rock 1\n", " rum 1\n", " sharp 1\n", " shows 1\n", " signs 1\n", " size 1\n", " spacecraft 1\n", " started 1\n", " store 1\n", " story 1\n", " summer 1\n", " sure 1\n", " talk 1\n", " ten 1\n", " than 2\n", " that 5\n", " the 10\n", " there 3\n", " times 1\n", " to 3\n", " took 1\n", " twenty 1\n", " was 3\n", " way 1\n", " which 1\n", " years 2\n", "LABEL_1 8\n", " ##adow 3\n", " ##biter 1\n", " ##ct 1\n", " ##d 2\n", " ##emble 1\n", " ##ern 1\n", " ##ly 1\n", " ##n 3\n", " ##ness 1\n", " ##nted 1\n", " ##pping 1\n", " ##rra 1\n", " ##s 4\n", " ##xel 1\n", " ##y 1\n", " ##zing 1\n", " , 26\n", " - 2\n", " . 3\n", " 1998 1\n", " 2001 1\n", " 3 1\n", " ; 1\n", " ? 1\n", " But 1\n", " Face 1\n", " Mali 2\n", " Mars 3\n", " Michael 1\n", " NASA 3\n", " No 1\n", " On 1\n", " Or 1\n", " So 1\n", " Some 1\n", " The 1\n", " Us 1\n", " Viking 1\n", " We 2\n", " Well 1\n", " What 1\n", " With 1\n", " a 4\n", " able 1\n", " absolute 1\n", " alien 2\n", " all 3\n", " ama 1\n", " an 1\n", " and 8\n", " announced 1\n", " another 2\n", " any 1\n", " appeared 1\n", " at 2\n", " be 2\n", " became 1\n", " because 1\n", " bigger 1\n", " budget 1\n", " but 3\n", " by 2\n", " camera 3\n", " can 1\n", " captured 1\n", " clear 1\n", " correct 1\n", " could 2\n", " created 1\n", " decided 2\n", " defender 1\n", " did 1\n", " digital 1\n", " disc 1\n", " easily 1\n", " engage 1\n", " evidence 1\n", " face 4\n", " figure 1\n", " findings 1\n", " for 1\n", " formed 1\n", " had 1\n", " hau 1\n", " head 1\n", " hidden 1\n", " hide 1\n", " his 1\n", " human 2\n", " if 2\n", " image 2\n", " in 2\n", " is 1\n", " it 7\n", " just 3\n", " like 1\n", " made 2\n", " make 1\n", " making 1\n", " marking 1\n", " maximum 1\n", " me 1\n", " mean 1\n", " means 1\n", " movies 1\n", " no 1\n", " obvious 1\n", " of 1\n", " on 2\n", " one 1\n", " only 2\n", " our 1\n", " out 1\n", " people 1\n", " photo 1\n", " photos 1\n", " pi 1\n", " picture 1\n", " prove 1\n", " re 1\n", " really 1\n", " res 1\n", " revealed 1\n", " revolution 1\n", " s 3\n", " scientist 1\n", " scientists 2\n", " see 2\n", " sh 3\n", " shot 2\n", " should 1\n", " showed 1\n", " sna 1\n", " soon 1\n", " spotted 1\n", " sure 2\n", " t 2\n", " take 2\n", " team 2\n", " tell 1\n", " that 5\n", " the 6\n", " them 1\n", " they 1\n", " things 1\n", " this 2\n", " thought 2\n", " times 1\n", " to 7\n", " us 1\n", " using 1\n", " very 1\n", " wanted 1\n", " was 7\n", " wasn 1\n", " we 5\n", " were 4\n", " what 1\n", " when 1\n", " which 4\n", " whole 1\n", " wish 1\n", " with 1\n", " would 1\n", " wrong 2\n", " ye 1\n", " you 4\n", "dtype: int64" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with open(\"29 numindNuNER-multilingual-v0.1.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()" ] }, { "cell_type": "markdown", "id": "3c3e4378-8c3b-4848-b7d5-8e370ae393ee", "metadata": {}, "source": [ "## 30 orgcatorg/bert-base-multilingual-cased-ner" ] }, { "cell_type": "code", "execution_count": 53, "id": "c780e0af-aa32-44a2-b519-dea67c5b5788", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'entity': 'B-ORG',\n", " 'score': 0.99468493,\n", " 'index': 60,\n", " 'word': 'Viking',\n", " 'start': 240,\n", " 'end': 246},\n", " {'entity': 'I-ORG',\n", " 'score': 0.99335337,\n", " 'index': 61,\n", " 'word': '1',\n", " 'start': 247,\n", " 'end': 248},\n", " {'entity': 'B-LOC',\n", " 'score': 0.9839153,\n", " 'index': 104,\n", " 'word': 'C',\n", " 'start': 435,\n", " 'end': 436},\n", " {'entity': 'B-LOC',\n", " 'score': 0.9820802,\n", " 'index': 105,\n", " 'word': '##yd',\n", " 'start': 436,\n", " 'end': 438},\n", " {'entity': 'B-LOC',\n", " 'score': 0.9102487,\n", " 'index': 106,\n", " 'word': '##onia',\n", " 'start': 438,\n", " 'end': 442},\n", " {'entity': 'B-PER',\n", " 'score': 0.8671783,\n", " 'index': 121,\n", " 'word': 'Egypt',\n", " 'start': 496,\n", " 'end': 501},\n", " {'entity': 'B-PER',\n", " 'score': 0.7123952,\n", " 'index': 122,\n", " 'word': '##ion',\n", " 'start': 501,\n", " 'end': 504},\n", " {'entity': 'I-PER',\n", " 'score': 0.94853634,\n", " 'index': 123,\n", " 'word': 'Ph',\n", " 'start': 505,\n", " 'end': 507},\n", " {'entity': 'I-PER',\n", " 'score': 0.96879715,\n", " 'index': 124,\n", " 'word': '##ara',\n", " 'start': 507,\n", " 'end': 510},\n", " {'entity': 'I-PER',\n", " 'score': 0.9677627,\n", " 'index': 125,\n", " 'word': '##oh',\n", " 'start': 510,\n", " 'end': 512},\n", " {'entity': 'B-PER',\n", " 'score': 0.9969103,\n", " 'index': 316,\n", " 'word': 'Michael',\n", " 'start': 1312,\n", " 'end': 1319},\n", " {'entity': 'I-PER',\n", " 'score': 0.99837106,\n", " 'index': 317,\n", " 'word': 'Mali',\n", " 'start': 1320,\n", " 'end': 1324},\n", " {'entity': 'I-PER',\n", " 'score': 0.9981609,\n", " 'index': 318,\n", " 'word': '##n',\n", " 'start': 1324,\n", " 'end': 1325},\n", " {'entity': 'B-ORG',\n", " 'score': 0.9981694,\n", " 'index': 321,\n", " 'word': 'Mars',\n", " 'start': 1334,\n", " 'end': 1338},\n", " {'entity': 'I-ORG',\n", " 'score': 0.99742895,\n", " 'index': 322,\n", " 'word': 'Or',\n", " 'start': 1339,\n", " 'end': 1341},\n", " {'entity': 'I-ORG',\n", " 'score': 0.9966016,\n", " 'index': 323,\n", " 'word': '##biter',\n", " 'start': 1341,\n", " 'end': 1346},\n", " {'entity': 'B-LOC',\n", " 'score': 0.9417412,\n", " 'index': 492,\n", " 'word': 'American',\n", " 'start': 2094,\n", " 'end': 2102},\n", " {'entity': 'I-LOC',\n", " 'score': 0.96071965,\n", " 'index': 493,\n", " 'word': 'West',\n", " 'start': 2103,\n", " 'end': 2107}]" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"orgcatorg/bert-base-multilingual-cased-ner\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"orgcatorg/bert-base-multilingual-cased-ner\")\n", "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "\n", "ner_results = nlp(text)\n", "ner_results" ] }, { "cell_type": "code", "execution_count": 55, "id": "36bc9e11-1524-4e9f-a21b-e5dea172a26f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "B-LOC 4\n", "B-ORG 2\n", "B-PER 3\n", "I-LOC 1\n", "I-ORG 3\n", "I-PER 5\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word \n", "B-LOC ##onia 1\n", " ##yd 1\n", " American 1\n", " C 1\n", "B-ORG Mars 1\n", " Viking 1\n", "B-PER ##ion 1\n", " Egypt 1\n", " Michael 1\n", "I-LOC West 1\n", "I-ORG ##biter 1\n", " 1 1\n", " Or 1\n", "I-PER ##ara 1\n", " ##n 1\n", " ##oh 1\n", " Mali 1\n", " Ph 1\n", "dtype: int64" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with open(\"30 orgcatorgbert-base-multilingual-cased-ner.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()" ] }, { "cell_type": "markdown", "id": "01668401-aae4-4edb-beca-5158043923da", "metadata": {}, "source": [ "## 31 orgcatorg/xlm-roberta-base-ner" ] }, { "cell_type": "code", "execution_count": 57, "id": "7218c33d-66a7-4aee-a1d6-aab2d0f53b4a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'entity': 'B-ORG',\n", " 'score': 0.9307573,\n", " 'index': 8,\n", " 'word': '▁NASA',\n", " 'start': 16,\n", " 'end': 20},\n", " {'entity': 'I-ORG',\n", " 'score': 0.55268466,\n", " 'index': 9,\n", " 'word': '▁scientist',\n", " 'start': 21,\n", " 'end': 30},\n", " {'entity': 'B-ORG',\n", " 'score': 0.8743138,\n", " 'index': 58,\n", " 'word': '▁Viking',\n", " 'start': 240,\n", " 'end': 246},\n", " {'entity': 'I-ORG',\n", " 'score': 0.8924012,\n", " 'index': 59,\n", " 'word': '▁1',\n", " 'start': 247,\n", " 'end': 248},\n", " {'entity': 'I-ORG',\n", " 'score': 0.8749563,\n", " 'index': 60,\n", " 'word': '▁space',\n", " 'start': 249,\n", " 'end': 254},\n", " {'entity': 'I-ORG',\n", " 'score': 0.83837587,\n", " 'index': 61,\n", " 'word': 'craft',\n", " 'start': 254,\n", " 'end': 259},\n", " {'entity': 'B-ORG',\n", " 'score': 0.31897616,\n", " 'index': 97,\n", " 'word': '▁Marti',\n", " 'start': 407,\n", " 'end': 412},\n", " {'entity': 'B-LOC',\n", " 'score': 0.66254807,\n", " 'index': 103,\n", " 'word': '▁Cy',\n", " 'start': 435,\n", " 'end': 437},\n", " {'entity': 'B-LOC',\n", " 'score': 0.6462039,\n", " 'index': 104,\n", " 'word': 'do',\n", " 'start': 437,\n", " 'end': 439},\n", " {'entity': 'B-LOC',\n", " 'score': 0.6156382,\n", " 'index': 105,\n", " 'word': 'nia',\n", " 'start': 439,\n", " 'end': 442},\n", " {'entity': 'B-ORG',\n", " 'score': 0.5166316,\n", " 'index': 119,\n", " 'word': '▁Egypt',\n", " 'start': 496,\n", " 'end': 501},\n", " {'entity': 'B-ORG',\n", " 'score': 0.5068441,\n", " 'index': 120,\n", " 'word': 'ion',\n", " 'start': 501,\n", " 'end': 504},\n", " {'entity': 'I-ORG',\n", " 'score': 0.51523536,\n", " 'index': 121,\n", " 'word': '▁Phar',\n", " 'start': 505,\n", " 'end': 509},\n", " {'entity': 'I-ORG',\n", " 'score': 0.5124219,\n", " 'index': 122,\n", " 'word': 'a',\n", " 'start': 509,\n", " 'end': 510},\n", " {'entity': 'I-ORG',\n", " 'score': 0.54218507,\n", " 'index': 123,\n", " 'word': 'oh',\n", " 'start': 510,\n", " 'end': 512},\n", " {'entity': 'B-PER',\n", " 'score': 0.9100919,\n", " 'index': 319,\n", " 'word': '▁Michael',\n", " 'start': 1312,\n", " 'end': 1319},\n", " {'entity': 'I-PER',\n", " 'score': 0.89008635,\n", " 'index': 320,\n", " 'word': '▁Malin',\n", " 'start': 1320,\n", " 'end': 1325},\n", " {'entity': 'B-ORG',\n", " 'score': 0.8640533,\n", " 'index': 323,\n", " 'word': '▁Mars',\n", " 'start': 1334,\n", " 'end': 1338},\n", " {'entity': 'I-ORG',\n", " 'score': 0.913112,\n", " 'index': 324,\n", " 'word': '▁Or',\n", " 'start': 1339,\n", " 'end': 1341},\n", " {'entity': 'I-ORG',\n", " 'score': 0.88857234,\n", " 'index': 325,\n", " 'word': 'bit',\n", " 'start': 1341,\n", " 'end': 1344},\n", " {'entity': 'I-ORG',\n", " 'score': 0.85235375,\n", " 'index': 326,\n", " 'word': 'er',\n", " 'start': 1344,\n", " 'end': 1346},\n", " {'entity': 'B-LOC',\n", " 'score': 0.50641507,\n", " 'index': 478,\n", " 'word': '▁but',\n", " 'start': 2041,\n", " 'end': 2044},\n", " {'entity': 'B-LOC',\n", " 'score': 0.8740346,\n", " 'index': 491,\n", " 'word': '▁American',\n", " 'start': 2094,\n", " 'end': 2102},\n", " {'entity': 'I-LOC',\n", " 'score': 0.78719395,\n", " 'index': 492,\n", " 'word': '▁West',\n", " 'start': 2103,\n", " 'end': 2107}]" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"orgcatorg/xlm-roberta-base-ner\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"orgcatorg/xlm-roberta-base-ner\")\n", "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "\n", "ner_results = nlp(text)\n", "ner_results" ] }, { "cell_type": "code", "execution_count": 58, "id": "c54964d5-041e-4fc7-989f-07f45279adaf", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "B-LOC 5\n", "B-ORG 6\n", "B-PER 1\n", "I-LOC 1\n", "I-ORG 10\n", "I-PER 1\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word \n", "B-LOC do 1\n", " nia 1\n", " ▁American 1\n", " ▁Cy 1\n", " ▁but 1\n", "B-ORG ion 1\n", " ▁Egypt 1\n", " ▁Mars 1\n", " ▁Marti 1\n", " ▁NASA 1\n", " ▁Viking 1\n", "B-PER ▁Michael 1\n", "I-LOC ▁West 1\n", "I-ORG a 1\n", " bit 1\n", " craft 1\n", " er 1\n", " oh 1\n", " ▁1 1\n", " ▁Or 1\n", " ▁Phar 1\n", " ▁scientist 1\n", " ▁space 1\n", "I-PER ▁Malin 1\n", "dtype: int64" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with open(\"31 orgcatorgxlm-roberta-base-ner.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()" ] }, { "cell_type": "markdown", "id": "55c388f1-41aa-451a-928e-41acfe12639d", "metadata": {}, "source": [ "## 32 orgcatorg/EntityCS-39-PEP_MS_MLM-xlmr-base" ] }, { "cell_type": "code", "execution_count": 60, "id": "34af1035-7a09-485b-9663-057506e508ac", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n" ] }, { "data": { "text/plain": [ "[{'entity': 'B-ORG',\n", " 'score': 0.9218857,\n", " 'index': 8,\n", " 'word': '▁NASA',\n", " 'start': 16,\n", " 'end': 20},\n", " {'entity': 'I-ORG',\n", " 'score': 0.9510029,\n", " 'index': 9,\n", " 'word': '▁scientist',\n", " 'start': 21,\n", " 'end': 30},\n", " {'entity': 'I-ORG',\n", " 'score': 0.8217206,\n", " 'index': 25,\n", " 'word': '▁Mars',\n", " 'start': 96,\n", " 'end': 100},\n", " {'entity': 'B-ORG',\n", " 'score': 0.96086377,\n", " 'index': 58,\n", " 'word': '▁Viking',\n", " 'start': 240,\n", " 'end': 246},\n", " {'entity': 'I-ORG',\n", " 'score': 0.9884774,\n", " 'index': 59,\n", " 'word': '▁1',\n", " 'start': 247,\n", " 'end': 248},\n", " {'entity': 'I-ORG',\n", " 'score': 0.9854343,\n", " 'index': 60,\n", " 'word': '▁space',\n", " 'start': 249,\n", " 'end': 254},\n", " {'entity': 'I-ORG',\n", " 'score': 0.97897524,\n", " 'index': 61,\n", " 'word': 'craft',\n", " 'start': 254,\n", " 'end': 259},\n", " {'entity': 'B-ORG',\n", " 'score': 0.8928362,\n", " 'index': 97,\n", " 'word': '▁Marti',\n", " 'start': 407,\n", " 'end': 412},\n", " {'entity': 'B-ORG',\n", " 'score': 0.83326113,\n", " 'index': 98,\n", " 'word': 'an',\n", " 'start': 412,\n", " 'end': 414},\n", " {'entity': 'I-ORG',\n", " 'score': 0.8212351,\n", " 'index': 99,\n", " 'word': '▁mesa',\n", " 'start': 415,\n", " 'end': 419},\n", " {'entity': 'B-ORG',\n", " 'score': 0.86985016,\n", " 'index': 119,\n", " 'word': '▁Egypt',\n", " 'start': 496,\n", " 'end': 501},\n", " {'entity': 'B-ORG',\n", " 'score': 0.87044686,\n", " 'index': 120,\n", " 'word': 'ion',\n", " 'start': 501,\n", " 'end': 504},\n", " {'entity': 'I-ORG',\n", " 'score': 0.70206004,\n", " 'index': 121,\n", " 'word': '▁Phar',\n", " 'start': 505,\n", " 'end': 509},\n", " {'entity': 'I-ORG',\n", " 'score': 0.8018863,\n", " 'index': 122,\n", " 'word': 'a',\n", " 'start': 509,\n", " 'end': 510},\n", " {'entity': 'I-ORG',\n", " 'score': 0.8662057,\n", " 'index': 123,\n", " 'word': 'oh',\n", " 'start': 510,\n", " 'end': 512},\n", " {'entity': 'B-PER',\n", " 'score': 0.8813573,\n", " 'index': 319,\n", " 'word': '▁Michael',\n", " 'start': 1312,\n", " 'end': 1319},\n", " {'entity': 'I-PER',\n", " 'score': 0.90332484,\n", " 'index': 320,\n", " 'word': '▁Malin',\n", " 'start': 1320,\n", " 'end': 1325},\n", " {'entity': 'I-ORG',\n", " 'score': 0.8650216,\n", " 'index': 324,\n", " 'word': '▁Or',\n", " 'start': 1339,\n", " 'end': 1341},\n", " {'entity': 'I-ORG',\n", " 'score': 0.79992837,\n", " 'index': 325,\n", " 'word': 'bit',\n", " 'start': 1341,\n", " 'end': 1344},\n", " {'entity': 'I-ORG',\n", " 'score': 0.8160805,\n", " 'index': 326,\n", " 'word': 'er',\n", " 'start': 1344,\n", " 'end': 1346}]" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"orgcatorg/EntityCS-39-PEP_MS_MLM-xlmr-base\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"orgcatorg/EntityCS-39-PEP_MS_MLM-xlmr-base\")\n", "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "\n", "ner_results = nlp(text)\n", "ner_results" ] }, { "cell_type": "code", "execution_count": 61, "id": "8f0dab43-9fd8-4a0f-a3b4-c936f4ab9d25", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "B-ORG 6\n", "B-PER 1\n", "I-ORG 12\n", "I-PER 1\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word \n", "B-ORG an 1\n", " ion 1\n", " ▁Egypt 1\n", " ▁Marti 1\n", " ▁NASA 1\n", " ▁Viking 1\n", "B-PER ▁Michael 1\n", "I-ORG a 1\n", " bit 1\n", " craft 1\n", " er 1\n", " oh 1\n", " ▁1 1\n", " ▁Mars 1\n", " ▁Or 1\n", " ▁Phar 1\n", " ▁mesa 1\n", " ▁scientist 1\n", " ▁space 1\n", "I-PER ▁Malin 1\n", "dtype: int64" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "with open(\"32 orgcatorgEntityCS-39-PEP_MS_MLM-xlmr-base.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()" ] }, { "cell_type": "markdown", "id": "4b343a33-2ae7-441a-8ff2-7b10dcd5611d", "metadata": {}, "source": [ "## 33 igorsterner/xlmr-multilingual-sentence-segmentation" ] }, { "cell_type": "code", "execution_count": 63, "id": "3ae7c05d-6f10-4666-92dc-6a3483c40afb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'entity': '|',\n", " 'score': 0.9995815,\n", " 'index': 49,\n", " 'word': '?\"',\n", " 'start': 206,\n", " 'end': 208},\n", " {'entity': '|',\n", " 'score': 0.9974347,\n", " 'index': 85,\n", " 'word': '.',\n", " 'start': 354,\n", " 'end': 355},\n", " {'entity': '|',\n", " 'score': 0.9996898,\n", " 'index': 124,\n", " 'word': '.',\n", " 'start': 512,\n", " 'end': 513},\n", " {'entity': '|',\n", " 'score': 0.9997476,\n", " 'index': 174,\n", " 'word': '.',\n", " 'start': 710,\n", " 'end': 711},\n", " {'entity': '|',\n", " 'score': 0.9997371,\n", " 'index': 211,\n", " 'word': '.',\n", " 'start': 860,\n", " 'end': 861},\n", " {'entity': '|',\n", " 'score': 0.9997018,\n", " 'index': 251,\n", " 'word': '.',\n", " 'start': 1021,\n", " 'end': 1022},\n", " {'entity': '|',\n", " 'score': 0.9995659,\n", " 'index': 296,\n", " 'word': '.',\n", " 'start': 1224,\n", " 'end': 1225},\n", " {'entity': '|',\n", " 'score': 0.9988242,\n", " 'index': 356,\n", " 'word': '.',\n", " 'start': 1493,\n", " 'end': 1494},\n", " {'entity': '|',\n", " 'score': 0.9983346,\n", " 'index': 380,\n", " 'word': '\"',\n", " 'start': 1590,\n", " 'end': 1591},\n", " {'entity': '|',\n", " 'score': 0.999102,\n", " 'index': 415,\n", " 'word': '.',\n", " 'start': 1743,\n", " 'end': 1744},\n", " {'entity': '|',\n", " 'score': 0.99834883,\n", " 'index': 433,\n", " 'word': '.',\n", " 'start': 1830,\n", " 'end': 1831},\n", " {'entity': '|',\n", " 'score': 0.99967754,\n", " 'index': 471,\n", " 'word': '.',\n", " 'start': 2007,\n", " 'end': 2008},\n", " {'entity': '|',\n", " 'score': 0.99990845,\n", " 'index': 493,\n", " 'word': '.',\n", " 'start': 2107,\n", " 'end': 2108}]" ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"igorsterner/xlmr-multilingual-sentence-segmentation\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"igorsterner/xlmr-multilingual-sentence-segmentation\")\n", "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "\n", "ner_results = nlp(text)\n", "ner_results" ] }, { "cell_type": "code", "execution_count": 65, "id": "32e7cb9f-a799-4227-a642-ef9af8602b15", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "| 13\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word\n", "| 1\n", " . 11\n", " ? 1\n", "dtype: int64" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "with open(\"33 igorsternerxlmr-multilingual-sentence-segmentation.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()" ] }, { "cell_type": "markdown", "id": "1fbccdc4-eafd-485d-bdd0-500d606e6429", "metadata": {}, "source": [ "## 34 mukowaty/punctuate-16" ] }, { "cell_type": "code", "execution_count": 67, "id": "997199dd-2e7f-43ee-8dc4-b5189ef1da47", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\NW\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n", "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n" ] }, { "data": { "text/plain": [ "[{'entity': '0',\n", " 'score': 0.77220947,\n", " 'index': 1,\n", " 'word': '▁So',\n", " 'start': 0,\n", " 'end': 2},\n", " {'entity': '0',\n", " 'score': 0.9685962,\n", " 'index': 2,\n", " 'word': ',',\n", " 'start': 2,\n", " 'end': 3},\n", " {'entity': '0',\n", " 'score': 0.99998283,\n", " 'index': 3,\n", " 'word': '▁if',\n", " 'start': 4,\n", " 'end': 6},\n", " {'entity': '0',\n", " 'score': 0.99998665,\n", " 'index': 4,\n", " 'word': '▁you',\n", " 'start': 7,\n", " 'end': 10},\n", " {'entity': '0',\n", " 'score': 0.99998367,\n", " 'index': 5,\n", " 'word': \"'\",\n", " 'start': 10,\n", " 'end': 11},\n", " {'entity': '0',\n", " 'score': 0.99998736,\n", " 'index': 6,\n", " 'word': 're',\n", " 'start': 11,\n", " 'end': 13},\n", " {'entity': '0',\n", " 'score': 0.99998903,\n", " 'index': 7,\n", " 'word': '▁a',\n", " 'start': 14,\n", " 'end': 15},\n", " {'entity': '0',\n", " 'score': 0.9999871,\n", " 'index': 8,\n", " 'word': '▁NASA',\n", " 'start': 16,\n", " 'end': 20},\n", " {'entity': '0',\n", " 'score': 0.8388357,\n", " 'index': 9,\n", " 'word': '▁scientist',\n", " 'start': 21,\n", " 'end': 30},\n", " {'entity': '0',\n", " 'score': 0.9971527,\n", " 'index': 10,\n", " 'word': ',',\n", " 'start': 30,\n", " 'end': 31},\n", " {'entity': '0',\n", " 'score': 0.9999814,\n", " 'index': 11,\n", " 'word': '▁you',\n", " 'start': 32,\n", " 'end': 35},\n", " {'entity': '0',\n", " 'score': 0.999954,\n", " 'index': 12,\n", " 'word': '▁should',\n", " 'start': 36,\n", " 'end': 42},\n", " {'entity': '0',\n", " 'score': 0.9999802,\n", " 'index': 13,\n", " 'word': '▁be',\n", " 'start': 43,\n", " 'end': 45},\n", " {'entity': '0',\n", " 'score': 0.99997485,\n", " 'index': 14,\n", " 'word': '▁able',\n", " 'start': 46,\n", " 'end': 50},\n", " {'entity': '0',\n", " 'score': 0.9999815,\n", " 'index': 15,\n", " 'word': '▁to',\n", " 'start': 51,\n", " 'end': 53},\n", " {'entity': '0',\n", " 'score': 0.9999883,\n", " 'index': 16,\n", " 'word': '▁tell',\n", " 'start': 54,\n", " 'end': 58},\n", " {'entity': '0',\n", " 'score': 0.9998753,\n", " 'index': 17,\n", " 'word': '▁me',\n", " 'start': 59,\n", " 'end': 61},\n", " {'entity': '0',\n", " 'score': 0.9999894,\n", " 'index': 18,\n", " 'word': '▁the',\n", " 'start': 62,\n", " 'end': 65},\n", " {'entity': '0',\n", " 'score': 0.9999883,\n", " 'index': 19,\n", " 'word': '▁whole',\n", " 'start': 66,\n", " 'end': 71},\n", " {'entity': '0',\n", " 'score': 0.9985863,\n", " 'index': 20,\n", " 'word': '▁story',\n", " 'start': 72,\n", " 'end': 77},\n", " {'entity': '0',\n", " 'score': 0.99981517,\n", " 'index': 21,\n", " 'word': '▁about',\n", " 'start': 78,\n", " 'end': 83},\n", " {'entity': '0',\n", " 'score': 0.9999635,\n", " 'index': 22,\n", " 'word': '▁the',\n", " 'start': 84,\n", " 'end': 87},\n", " {'entity': '0',\n", " 'score': 0.9999056,\n", " 'index': 23,\n", " 'word': '▁Face',\n", " 'start': 88,\n", " 'end': 92},\n", " {'entity': '0',\n", " 'score': 0.9999746,\n", " 'index': 24,\n", " 'word': '▁On',\n", " 'start': 93,\n", " 'end': 95},\n", " {'entity': '0',\n", " 'score': 0.92461413,\n", " 'index': 25,\n", " 'word': '▁Mars',\n", " 'start': 96,\n", " 'end': 100},\n", " {'entity': '0',\n", " 'score': 0.98427933,\n", " 'index': 26,\n", " 'word': ',',\n", " 'start': 100,\n", " 'end': 101},\n", " {'entity': '0',\n", " 'score': 0.85343385,\n", " 'index': 27,\n", " 'word': '▁which',\n", " 'start': 102,\n", " 'end': 107},\n", " {'entity': '0',\n", " 'score': 0.837885,\n", " 'index': 28,\n", " 'word': '▁obviously',\n", " 'start': 108,\n", " 'end': 117},\n", " {'entity': '0',\n", " 'score': 0.9999728,\n", " 'index': 29,\n", " 'word': '▁is',\n", " 'start': 118,\n", " 'end': 120},\n", " {'entity': '0',\n", " 'score': 0.9998778,\n", " 'index': 30,\n", " 'word': '▁evidence',\n", " 'start': 121,\n", " 'end': 129},\n", " {'entity': '0',\n", " 'score': 0.9999541,\n", " 'index': 31,\n", " 'word': '▁that',\n", " 'start': 130,\n", " 'end': 134},\n", " {'entity': '0',\n", " 'score': 0.9999889,\n", " 'index': 32,\n", " 'word': '▁there',\n", " 'start': 135,\n", " 'end': 140},\n", " {'entity': '0',\n", " 'score': 0.9999783,\n", " 'index': 33,\n", " 'word': '▁is',\n", " 'start': 141,\n", " 'end': 143},\n", " {'entity': '0',\n", " 'score': 0.9999826,\n", " 'index': 34,\n", " 'word': '▁life',\n", " 'start': 144,\n", " 'end': 148},\n", " {'entity': '0',\n", " 'score': 0.9999865,\n", " 'index': 35,\n", " 'word': '▁on',\n", " 'start': 149,\n", " 'end': 151},\n", " {'entity': '0',\n", " 'score': 0.9567498,\n", " 'index': 36,\n", " 'word': '▁Mars',\n", " 'start': 152,\n", " 'end': 156},\n", " {'entity': '0',\n", " 'score': 0.9929469,\n", " 'index': 37,\n", " 'word': ',',\n", " 'start': 156,\n", " 'end': 157},\n", " {'entity': '0',\n", " 'score': 0.99991345,\n", " 'index': 38,\n", " 'word': '▁and',\n", " 'start': 158,\n", " 'end': 161},\n", " {'entity': '0',\n", " 'score': 0.9999398,\n", " 'index': 39,\n", " 'word': '▁that',\n", " 'start': 162,\n", " 'end': 166},\n", " {'entity': '0',\n", " 'score': 0.99998796,\n", " 'index': 40,\n", " 'word': '▁the',\n", " 'start': 167,\n", " 'end': 170},\n", " {'entity': '0',\n", " 'score': 0.9999676,\n", " 'index': 41,\n", " 'word': '▁face',\n", " 'start': 171,\n", " 'end': 175},\n", " {'entity': '0',\n", " 'score': 0.99997556,\n", " 'index': 42,\n", " 'word': '▁was',\n", " 'start': 176,\n", " 'end': 179},\n", " {'entity': '0',\n", " 'score': 0.9999752,\n", " 'index': 43,\n", " 'word': '▁created',\n", " 'start': 180,\n", " 'end': 187},\n", " {'entity': '0',\n", " 'score': 0.99997556,\n", " 'index': 44,\n", " 'word': '▁by',\n", " 'start': 188,\n", " 'end': 190},\n", " {'entity': '0',\n", " 'score': 0.6871036,\n", " 'index': 45,\n", " 'word': '▁alien',\n", " 'start': 191,\n", " 'end': 196},\n", " {'entity': '0',\n", " 'score': 0.718696,\n", " 'index': 46,\n", " 'word': 's',\n", " 'start': 196,\n", " 'end': 197},\n", " {'entity': '0',\n", " 'score': 0.9882825,\n", " 'index': 47,\n", " 'word': ',',\n", " 'start': 197,\n", " 'end': 198},\n", " {'entity': '0',\n", " 'score': 0.7325032,\n", " 'index': 48,\n", " 'word': '▁correct',\n", " 'start': 199,\n", " 'end': 206},\n", " {'entity': '.',\n", " 'score': 0.6800837,\n", " 'index': 49,\n", " 'word': '?\"',\n", " 'start': 206,\n", " 'end': 208},\n", " {'entity': ',',\n", " 'score': 0.630742,\n", " 'index': 50,\n", " 'word': '▁No',\n", " 'start': 209,\n", " 'end': 211},\n", " {'entity': '0',\n", " 'score': 0.537651,\n", " 'index': 51,\n", " 'word': ',',\n", " 'start': 211,\n", " 'end': 212},\n", " {'entity': '0',\n", " 'score': 0.9979886,\n", " 'index': 52,\n", " 'word': '▁twenty',\n", " 'start': 213,\n", " 'end': 219},\n", " {'entity': '0',\n", " 'score': 0.9999819,\n", " 'index': 53,\n", " 'word': '▁five',\n", " 'start': 220,\n", " 'end': 224},\n", " {'entity': '0',\n", " 'score': 0.99998283,\n", " 'index': 54,\n", " 'word': '▁years',\n", " 'start': 225,\n", " 'end': 230},\n", " {'entity': '0',\n", " 'score': 0.9173825,\n", " 'index': 55,\n", " 'word': '▁ago',\n", " 'start': 231,\n", " 'end': 234},\n", " {'entity': '0',\n", " 'score': 0.82203543,\n", " 'index': 56,\n", " 'word': ',',\n", " 'start': 234,\n", " 'end': 235},\n", " {'entity': '0',\n", " 'score': 0.9999807,\n", " 'index': 57,\n", " 'word': '▁our',\n", " 'start': 236,\n", " 'end': 239},\n", " {'entity': '0',\n", " 'score': 0.99979943,\n", " 'index': 58,\n", " 'word': '▁Viking',\n", " 'start': 240,\n", " 'end': 246},\n", " {'entity': '0',\n", " 'score': 0.99969375,\n", " 'index': 59,\n", " 'word': '▁1',\n", " 'start': 247,\n", " 'end': 248},\n", " {'entity': '0',\n", " 'score': 0.9997416,\n", " 'index': 60,\n", " 'word': '▁space',\n", " 'start': 249,\n", " 'end': 254},\n", " {'entity': '0',\n", " 'score': 0.9997236,\n", " 'index': 61,\n", " 'word': 'craft',\n", " 'start': 254,\n", " 'end': 259},\n", " {'entity': '0',\n", " 'score': 0.99925953,\n", " 'index': 62,\n", " 'word': '▁was',\n", " 'start': 260,\n", " 'end': 263},\n", " {'entity': '0',\n", " 'score': 0.99998724,\n", " 'index': 63,\n", " 'word': '▁circ',\n", " 'start': 264,\n", " 'end': 268},\n", " {'entity': '0',\n", " 'score': 0.9999875,\n", " 'index': 64,\n", " 'word': 'ling',\n", " 'start': 268,\n", " 'end': 272},\n", " {'entity': '0',\n", " 'score': 0.9999896,\n", " 'index': 65,\n", " 'word': '▁the',\n", " 'start': 273,\n", " 'end': 276},\n", " {'entity': '0',\n", " 'score': 0.9864899,\n", " 'index': 66,\n", " 'word': '▁planet',\n", " 'start': 277,\n", " 'end': 283},\n", " {'entity': '0',\n", " 'score': 0.9984669,\n", " 'index': 67,\n", " 'word': ',',\n", " 'start': 283,\n", " 'end': 284},\n", " {'entity': '0',\n", " 'score': 0.9999856,\n", " 'index': 68,\n", " 'word': '▁sna',\n", " 'start': 285,\n", " 'end': 288},\n", " {'entity': '0',\n", " 'score': 0.99998784,\n", " 'index': 69,\n", " 'word': 'pping',\n", " 'start': 288,\n", " 'end': 293},\n", " {'entity': '0',\n", " 'score': 0.95048034,\n", " 'index': 70,\n", " 'word': '▁photos',\n", " 'start': 294,\n", " 'end': 300},\n", " {'entity': '0',\n", " 'score': 0.9701614,\n", " 'index': 71,\n", " 'word': ',',\n", " 'start': 300,\n", " 'end': 301},\n", " {'entity': '0',\n", " 'score': 0.9999386,\n", " 'index': 72,\n", " 'word': '▁when',\n", " 'start': 302,\n", " 'end': 306},\n", " {'entity': '0',\n", " 'score': 0.9999896,\n", " 'index': 73,\n", " 'word': '▁it',\n", " 'start': 307,\n", " 'end': 309},\n", " {'entity': '0',\n", " 'score': 0.9999639,\n", " 'index': 74,\n", " 'word': '▁spot',\n", " 'start': 310,\n", " 'end': 314},\n", " {'entity': '0',\n", " 'score': 0.9999621,\n", " 'index': 75,\n", " 'word': 'ted',\n", " 'start': 314,\n", " 'end': 317},\n", " {'entity': '0',\n", " 'score': 0.9999682,\n", " 'index': 76,\n", " 'word': '▁the',\n", " 'start': 318,\n", " 'end': 321},\n", " {'entity': '0',\n", " 'score': 0.9999527,\n", " 'index': 77,\n", " 'word': '▁shadow',\n", " 'start': 322,\n", " 'end': 328},\n", " {'entity': '0',\n", " 'score': 0.99995685,\n", " 'index': 78,\n", " 'word': 'y',\n", " 'start': 328,\n", " 'end': 329},\n", " {'entity': '0',\n", " 'score': 0.9999789,\n", " 'index': 79,\n", " 'word': '▁like',\n", " 'start': 330,\n", " 'end': 334},\n", " {'entity': '0',\n", " 'score': 0.9999783,\n", " 'index': 80,\n", " 'word': 'ness',\n", " 'start': 334,\n", " 'end': 338},\n", " {'entity': '0',\n", " 'score': 0.99998283,\n", " 'index': 81,\n", " 'word': '▁of',\n", " 'start': 339,\n", " 'end': 341},\n", " {'entity': '0',\n", " 'score': 0.9999856,\n", " 'index': 82,\n", " 'word': '▁a',\n", " 'start': 342,\n", " 'end': 343},\n", " {'entity': '0',\n", " 'score': 0.99998474,\n", " 'index': 83,\n", " 'word': '▁human',\n", " 'start': 344,\n", " 'end': 349},\n", " {'entity': '0',\n", " 'score': 0.9708572,\n", " 'index': 84,\n", " 'word': '▁face',\n", " 'start': 350,\n", " 'end': 354},\n", " {'entity': '0',\n", " 'score': 0.98933613,\n", " 'index': 85,\n", " 'word': '.',\n", " 'start': 354,\n", " 'end': 355},\n", " {'entity': '0',\n", " 'score': 0.9997725,\n", " 'index': 86,\n", " 'word': '▁Us',\n", " 'start': 356,\n", " 'end': 358},\n", " {'entity': '0',\n", " 'score': 0.9999485,\n", " 'index': 87,\n", " 'word': '▁scientist',\n", " 'start': 359,\n", " 'end': 368},\n", " {'entity': '0',\n", " 'score': 0.999949,\n", " 'index': 88,\n", " 'word': 's',\n", " 'start': 368,\n", " 'end': 369},\n", " {'entity': '0',\n", " 'score': 0.9999881,\n", " 'index': 89,\n", " 'word': '▁figure',\n", " 'start': 370,\n", " 'end': 376},\n", " {'entity': '0',\n", " 'score': 0.9999857,\n", " 'index': 90,\n", " 'word': 'd',\n", " 'start': 376,\n", " 'end': 377},\n", " {'entity': '0',\n", " 'score': 0.999859,\n", " 'index': 91,\n", " 'word': '▁out',\n", " 'start': 378,\n", " 'end': 381},\n", " {'entity': '0',\n", " 'score': 0.9994584,\n", " 'index': 92,\n", " 'word': '▁that',\n", " 'start': 382,\n", " 'end': 386},\n", " {'entity': '0',\n", " 'score': 0.9999875,\n", " 'index': 93,\n", " 'word': '▁it',\n", " 'start': 387,\n", " 'end': 389},\n", " {'entity': '0',\n", " 'score': 0.99992657,\n", " 'index': 94,\n", " 'word': '▁was',\n", " 'start': 390,\n", " 'end': 393},\n", " {'entity': '0',\n", " 'score': 0.9998423,\n", " 'index': 95,\n", " 'word': '▁just',\n", " 'start': 394,\n", " 'end': 398},\n", " {'entity': '0',\n", " 'score': 0.99845207,\n", " 'index': 96,\n", " 'word': '▁another',\n", " 'start': 399,\n", " 'end': 406},\n", " {'entity': '0',\n", " 'score': 0.99955803,\n", " 'index': 97,\n", " 'word': '▁Marti',\n", " 'start': 407,\n", " 'end': 412},\n", " {'entity': '0',\n", " 'score': 0.99972445,\n", " 'index': 98,\n", " 'word': 'an',\n", " 'start': 412,\n", " 'end': 414},\n", " {'entity': '0',\n", " 'score': 0.95585614,\n", " 'index': 99,\n", " 'word': '▁mesa',\n", " 'start': 415,\n", " 'end': 419},\n", " {'entity': '0',\n", " 'score': 0.94397455,\n", " 'index': 100,\n", " 'word': ',',\n", " 'start': 419,\n", " 'end': 420},\n", " {'entity': '0',\n", " 'score': 0.99993527,\n", " 'index': 101,\n", " 'word': '▁common',\n", " 'start': 421,\n", " 'end': 427},\n", " {'entity': '0',\n", " 'score': 0.99996245,\n", " 'index': 102,\n", " 'word': '▁around',\n", " 'start': 428,\n", " 'end': 434},\n", " {'entity': '0',\n", " 'score': 0.856652,\n", " 'index': 103,\n", " 'word': '▁Cy',\n", " 'start': 435,\n", " 'end': 437},\n", " {'entity': '0',\n", " 'score': 0.8526323,\n", " 'index': 104,\n", " 'word': 'do',\n", " 'start': 437,\n", " 'end': 439},\n", " {'entity': '0',\n", " 'score': 0.83899707,\n", " 'index': 105,\n", " 'word': 'nia',\n", " 'start': 439,\n", " 'end': 442},\n", " {'entity': '0',\n", " 'score': 0.97646606,\n", " 'index': 106,\n", " 'word': ',',\n", " 'start': 442,\n", " 'end': 443},\n", " {'entity': '0',\n", " 'score': 0.9839803,\n", " 'index': 107,\n", " 'word': '▁only',\n", " 'start': 444,\n", " 'end': 448},\n", " {'entity': '0',\n", " 'score': 0.99994636,\n", " 'index': 108,\n", " 'word': '▁this',\n", " 'start': 449,\n", " 'end': 453},\n", " {'entity': '0',\n", " 'score': 0.99917173,\n", " 'index': 109,\n", " 'word': '▁one',\n", " 'start': 454,\n", " 'end': 457},\n", " {'entity': '0',\n", " 'score': 0.9998348,\n", " 'index': 110,\n", " 'word': '▁had',\n", " 'start': 458,\n", " 'end': 461},\n", " {'entity': '0',\n", " 'score': 0.9699841,\n", " 'index': 111,\n", " 'word': '▁shadow',\n", " 'start': 462,\n", " 'end': 468},\n", " {'entity': '0',\n", " 'score': 0.9711239,\n", " 'index': 112,\n", " 'word': 's',\n", " 'start': 468,\n", " 'end': 469},\n", " {'entity': '0',\n", " 'score': 0.9998492,\n", " 'index': 113,\n", " 'word': '▁that',\n", " 'start': 470,\n", " 'end': 474},\n", " {'entity': '0',\n", " 'score': 0.99998796,\n", " 'index': 114,\n", " 'word': '▁made',\n", " 'start': 475,\n", " 'end': 479},\n", " {'entity': '0',\n", " 'score': 0.99998903,\n", " 'index': 115,\n", " 'word': '▁it',\n", " 'start': 480,\n", " 'end': 482},\n", " {'entity': '0',\n", " 'score': 0.9999857,\n", " 'index': 116,\n", " 'word': '▁look',\n", " 'start': 483,\n", " 'end': 487},\n", " {'entity': '0',\n", " 'score': 0.9999777,\n", " 'index': 117,\n", " 'word': '▁like',\n", " 'start': 488,\n", " 'end': 492},\n", " {'entity': '0',\n", " 'score': 0.9999589,\n", " 'index': 118,\n", " 'word': '▁an',\n", " 'start': 493,\n", " 'end': 495},\n", " {'entity': '0',\n", " 'score': 0.99981254,\n", " 'index': 119,\n", " 'word': '▁Egypt',\n", " 'start': 496,\n", " 'end': 501},\n", " {'entity': '0',\n", " 'score': 0.9998149,\n", " 'index': 120,\n", " 'word': 'ion',\n", " 'start': 501,\n", " 'end': 504},\n", " {'entity': '0',\n", " 'score': 0.82426316,\n", " 'index': 121,\n", " 'word': '▁Phar',\n", " 'start': 505,\n", " 'end': 509},\n", " {'entity': '0',\n", " 'score': 0.84575367,\n", " 'index': 122,\n", " 'word': 'a',\n", " 'start': 509,\n", " 'end': 510},\n", " {'entity': '0',\n", " 'score': 0.8702296,\n", " 'index': 123,\n", " 'word': 'oh',\n", " 'start': 510,\n", " 'end': 512},\n", " {'entity': '0',\n", " 'score': 0.8167109,\n", " 'index': 124,\n", " 'word': '.',\n", " 'start': 512,\n", " 'end': 513},\n", " {'entity': '0',\n", " 'score': 0.99995625,\n", " 'index': 125,\n", " 'word': '▁Very',\n", " 'start': 514,\n", " 'end': 518},\n", " {'entity': '0',\n", " 'score': 0.99997604,\n", " 'index': 126,\n", " 'word': '▁few',\n", " 'start': 519,\n", " 'end': 522},\n", " {'entity': '0',\n", " 'score': 0.9999708,\n", " 'index': 127,\n", " 'word': '▁days',\n", " 'start': 523,\n", " 'end': 527},\n", " {'entity': '0',\n", " 'score': 0.8718785,\n", " 'index': 128,\n", " 'word': '▁later',\n", " 'start': 528,\n", " 'end': 533},\n", " {'entity': '0',\n", " 'score': 0.89001435,\n", " 'index': 129,\n", " 'word': ',',\n", " 'start': 533,\n", " 'end': 534},\n", " {'entity': '0',\n", " 'score': 0.9999827,\n", " 'index': 130,\n", " 'word': '▁we',\n", " 'start': 535,\n", " 'end': 537},\n", " {'entity': '0',\n", " 'score': 0.9999839,\n", " 'index': 131,\n", " 'word': '▁reveal',\n", " 'start': 538,\n", " 'end': 544},\n", " {'entity': '0',\n", " 'score': 0.99997723,\n", " 'index': 132,\n", " 'word': 'ed',\n", " 'start': 544,\n", " 'end': 546},\n", " {'entity': '0',\n", " 'score': 0.99999,\n", " 'index': 133,\n", " 'word': '▁the',\n", " 'start': 547,\n", " 'end': 550},\n", " {'entity': '0',\n", " 'score': 0.99427587,\n", " 'index': 134,\n", " 'word': '▁image',\n", " 'start': 551,\n", " 'end': 556},\n", " {'entity': '0',\n", " 'score': 0.9999856,\n", " 'index': 135,\n", " 'word': '▁for',\n", " 'start': 557,\n", " 'end': 560},\n", " {'entity': '0',\n", " 'score': 0.99998295,\n", " 'index': 136,\n", " 'word': '▁all',\n", " 'start': 561,\n", " 'end': 564},\n", " {'entity': '0',\n", " 'score': 0.9999893,\n", " 'index': 137,\n", " 'word': '▁to',\n", " 'start': 565,\n", " 'end': 567},\n", " {'entity': '0',\n", " 'score': 0.946914,\n", " 'index': 138,\n", " 'word': '▁see',\n", " 'start': 568,\n", " 'end': 571},\n", " {'entity': '0',\n", " 'score': 0.987288,\n", " 'index': 139,\n", " 'word': ',',\n", " 'start': 571,\n", " 'end': 572},\n", " {'entity': '0',\n", " 'score': 0.999188,\n", " 'index': 140,\n", " 'word': '▁and',\n", " 'start': 573,\n", " 'end': 576},\n", " {'entity': '0',\n", " 'score': 0.9999727,\n", " 'index': 141,\n", " 'word': '▁we',\n", " 'start': 577,\n", " 'end': 579},\n", " {'entity': '0',\n", " 'score': 0.99997914,\n", " 'index': 142,\n", " 'word': '▁made',\n", " 'start': 580,\n", " 'end': 584},\n", " {'entity': '0',\n", " 'score': 0.9997273,\n", " 'index': 143,\n", " 'word': '▁sure',\n", " 'start': 585,\n", " 'end': 589},\n", " {'entity': '0',\n", " 'score': 0.9999852,\n", " 'index': 144,\n", " 'word': '▁to',\n", " 'start': 590,\n", " 'end': 592},\n", " {'entity': '0',\n", " 'score': 0.9989028,\n", " 'index': 145,\n", " 'word': '▁note',\n", " 'start': 593,\n", " 'end': 597},\n", " {'entity': '0',\n", " 'score': 0.99538165,\n", " 'index': 146,\n", " 'word': '▁that',\n", " 'start': 598,\n", " 'end': 602},\n", " {'entity': '0',\n", " 'score': 0.9999857,\n", " 'index': 147,\n", " 'word': '▁it',\n", " 'start': 603,\n", " 'end': 605},\n", " {'entity': '0',\n", " 'score': 0.9998722,\n", " 'index': 148,\n", " 'word': '▁was',\n", " 'start': 606,\n", " 'end': 609},\n", " {'entity': '0',\n", " 'score': 0.9999764,\n", " 'index': 149,\n", " 'word': '▁a',\n", " 'start': 610,\n", " 'end': 611},\n", " {'entity': '0',\n", " 'score': 0.99972886,\n", " 'index': 150,\n", " 'word': '▁huge',\n", " 'start': 612,\n", " 'end': 616},\n", " {'entity': '0',\n", " 'score': 0.99993753,\n", " 'index': 151,\n", " 'word': '▁rock',\n", " 'start': 617,\n", " 'end': 621},\n", " {'entity': '0',\n", " 'score': 0.98856163,\n", " 'index': 152,\n", " 'word': '▁formation',\n", " 'start': 622,\n", " 'end': 631},\n", " {'entity': '0',\n", " 'score': 0.9999294,\n", " 'index': 153,\n", " 'word': '▁that',\n", " 'start': 632,\n", " 'end': 636},\n", " {'entity': '0',\n", " 'score': 0.99995327,\n", " 'index': 154,\n", " 'word': '▁just',\n", " 'start': 637,\n", " 'end': 641},\n", " {'entity': '0',\n", " 'score': 0.999966,\n", " 'index': 155,\n", " 'word': '▁rese',\n", " 'start': 642,\n", " 'end': 646},\n", " {'entity': '0',\n", " 'score': 0.99995995,\n", " 'index': 156,\n", " 'word': 'mble',\n", " 'start': 646,\n", " 'end': 650},\n", " {'entity': '0',\n", " 'score': 0.99994195,\n", " 'index': 157,\n", " 'word': 'd',\n", " 'start': 650,\n", " 'end': 651},\n", " {'entity': '0',\n", " 'score': 0.99997365,\n", " 'index': 158,\n", " 'word': '▁a',\n", " 'start': 652,\n", " 'end': 653},\n", " {'entity': '0',\n", " 'score': 0.9996898,\n", " 'index': 159,\n", " 'word': '▁human',\n", " 'start': 654,\n", " 'end': 659},\n", " {'entity': '0',\n", " 'score': 0.99964654,\n", " 'index': 160,\n", " 'word': '▁head',\n", " 'start': 660,\n", " 'end': 664},\n", " {'entity': '0',\n", " 'score': 0.9999701,\n", " 'index': 161,\n", " 'word': '▁and',\n", " 'start': 665,\n", " 'end': 668},\n", " {'entity': '0',\n", " 'score': 0.6061729,\n", " 'index': 162,\n", " 'word': '▁face',\n", " 'start': 669,\n", " 'end': 673},\n", " {'entity': '0',\n", " 'score': 0.9770811,\n", " 'index': 163,\n", " 'word': ',',\n", " 'start': 673,\n", " 'end': 674},\n", " {'entity': '0',\n", " 'score': 0.99860054,\n", " 'index': 164,\n", " 'word': '▁but',\n", " 'start': 675,\n", " 'end': 678},\n", " {'entity': '0',\n", " 'score': 0.999987,\n", " 'index': 165,\n", " 'word': '▁all',\n", " 'start': 679,\n", " 'end': 682},\n", " {'entity': '0',\n", " 'score': 0.99998677,\n", " 'index': 166,\n", " 'word': '▁of',\n", " 'start': 683,\n", " 'end': 685},\n", " {'entity': '0',\n", " 'score': 0.99996483,\n", " 'index': 167,\n", " 'word': '▁it',\n", " 'start': 686,\n", " 'end': 688},\n", " {'entity': '0',\n", " 'score': 0.9999796,\n", " 'index': 168,\n", " 'word': '▁was',\n", " 'start': 689,\n", " 'end': 692},\n", " {'entity': '0',\n", " 'score': 0.99998987,\n", " 'index': 169,\n", " 'word': '▁for',\n", " 'start': 693,\n", " 'end': 696},\n", " {'entity': '0',\n", " 'score': 0.99998856,\n", " 'index': 170,\n", " 'word': 'med',\n", " 'start': 696,\n", " 'end': 699},\n", " {'entity': '0',\n", " 'score': 0.99998856,\n", " 'index': 171,\n", " 'word': '▁by',\n", " 'start': 700,\n", " 'end': 702},\n", " {'entity': '0',\n", " 'score': 0.98339117,\n", " 'index': 172,\n", " 'word': '▁shadow',\n", " 'start': 703,\n", " 'end': 709},\n", " {'entity': '0',\n", " 'score': 0.98900115,\n", " 'index': 173,\n", " 'word': 's',\n", " 'start': 709,\n", " 'end': 710},\n", " {'entity': '0',\n", " 'score': 0.98375016,\n", " 'index': 174,\n", " 'word': '.',\n", " 'start': 710,\n", " 'end': 711},\n", " {'entity': '0',\n", " 'score': 0.9999788,\n", " 'index': 175,\n", " 'word': '▁We',\n", " 'start': 712,\n", " 'end': 714},\n", " {'entity': '0',\n", " 'score': 0.99996126,\n", " 'index': 176,\n", " 'word': '▁only',\n", " 'start': 715,\n", " 'end': 719},\n", " {'entity': '0',\n", " 'score': 0.9999857,\n", " 'index': 177,\n", " 'word': '▁announced',\n", " 'start': 720,\n", " 'end': 729},\n", " {'entity': '0',\n", " 'score': 0.97736925,\n", " 'index': 178,\n", " 'word': '▁it',\n", " 'start': 730,\n", " 'end': 732},\n", " {'entity': '0',\n", " 'score': 0.99984276,\n", " 'index': 179,\n", " 'word': '▁because',\n", " 'start': 733,\n", " 'end': 740},\n", " {'entity': '0',\n", " 'score': 0.99998534,\n", " 'index': 180,\n", " 'word': '▁we',\n", " 'start': 741,\n", " 'end': 743},\n", " {'entity': '0',\n", " 'score': 0.9997471,\n", " 'index': 181,\n", " 'word': '▁thought',\n", " 'start': 744,\n", " 'end': 751},\n", " {'entity': '0',\n", " 'score': 0.9999869,\n", " 'index': 182,\n", " 'word': '▁it',\n", " 'start': 752,\n", " 'end': 754},\n", " {'entity': '0',\n", " 'score': 0.9999846,\n", " 'index': 183,\n", " 'word': '▁would',\n", " 'start': 755,\n", " 'end': 760},\n", " {'entity': '0',\n", " 'score': 0.99996984,\n", " 'index': 184,\n", " 'word': '▁be',\n", " 'start': 761,\n", " 'end': 763},\n", " {'entity': '0',\n", " 'score': 0.99998915,\n", " 'index': 185,\n", " 'word': '▁a',\n", " 'start': 764,\n", " 'end': 765},\n", " {'entity': '0',\n", " 'score': 0.99998903,\n", " 'index': 186,\n", " 'word': '▁good',\n", " 'start': 766,\n", " 'end': 770},\n", " {'entity': '0',\n", " 'score': 0.9998764,\n", " 'index': 187,\n", " 'word': '▁way',\n", " 'start': 771,\n", " 'end': 774},\n", " {'entity': '0',\n", " 'score': 0.99996376,\n", " 'index': 188,\n", " 'word': '▁to',\n", " 'start': 775,\n", " 'end': 777},\n", " {'entity': '0',\n", " 'score': 0.9999871,\n", " 'index': 189,\n", " 'word': '▁engage',\n", " 'start': 778,\n", " 'end': 784},\n", " {'entity': '0',\n", " 'score': 0.99999,\n", " 'index': 190,\n", " 'word': '▁the',\n", " 'start': 785,\n", " 'end': 788},\n", " {'entity': '0',\n", " 'score': 0.9999553,\n", " 'index': 191,\n", " 'word': '▁public',\n", " 'start': 789,\n", " 'end': 795},\n", " {'entity': '0',\n", " 'score': 0.9999813,\n", " 'index': 192,\n", " 'word': '▁with',\n", " 'start': 796,\n", " 'end': 800},\n", " {'entity': '0',\n", " 'score': 0.9999882,\n", " 'index': 193,\n", " 'word': '▁NASA',\n", " 'start': 801,\n", " 'end': 805},\n", " {'entity': '0',\n", " 'score': 0.9999893,\n", " 'index': 194,\n", " 'word': \"'\",\n", " 'start': 805,\n", " 'end': 806},\n", " {'entity': '0',\n", " 'score': 0.9999865,\n", " 'index': 195,\n", " 'word': 's',\n", " 'start': 806,\n", " 'end': 807},\n", " {'entity': '0',\n", " 'score': 0.99429613,\n", " 'index': 196,\n", " 'word': '▁finding',\n", " 'start': 808,\n", " 'end': 815},\n", " {'entity': '0',\n", " 'score': 0.9942463,\n", " 'index': 197,\n", " 'word': 's',\n", " 'start': 815,\n", " 'end': 816},\n", " {'entity': '0',\n", " 'score': 0.99662673,\n", " 'index': 198,\n", " 'word': ',',\n", " 'start': 816,\n", " 'end': 817},\n", " {'entity': '0',\n", " 'score': 0.9999397,\n", " 'index': 199,\n", " 'word': '▁and',\n", " 'start': 818,\n", " 'end': 821},\n", " {'entity': '0',\n", " 'score': 0.99998295,\n", " 'index': 200,\n", " 'word': '▁at',\n", " 'start': 822,\n", " 'end': 824},\n", " {'entity': '0',\n", " 'score': 0.9999753,\n", " 'index': 201,\n", " 'word': 'rra',\n", " 'start': 824,\n", " 'end': 827},\n", " {'entity': '0',\n", " 'score': 0.9999758,\n", " 'index': 202,\n", " 'word': 'ct',\n", " 'start': 827,\n", " 'end': 829},\n", " {'entity': '0',\n", " 'score': 0.9999795,\n", " 'index': 203,\n", " 'word': '▁attention',\n", " 'start': 830,\n", " 'end': 839},\n", " {'entity': '0',\n", " 'score': 0.99981695,\n", " 'index': 204,\n", " 'word': '▁to',\n", " 'start': 840,\n", " 'end': 842},\n", " {'entity': '0',\n", " 'score': 0.9528797,\n", " 'index': 205,\n", " 'word': '▁Mars',\n", " 'start': 843,\n", " 'end': 847},\n", " {'entity': '0',\n", " 'score': 0.9499835,\n", " 'index': 206,\n", " 'word': '-',\n", " 'start': 847,\n", " 'end': 848},\n", " {'entity': '0',\n", " 'score': 0.96378785,\n", " 'index': 207,\n", " 'word': '-',\n", " 'start': 848,\n", " 'end': 849},\n", " {'entity': '0',\n", " 'score': 0.9997296,\n", " 'index': 208,\n", " 'word': '▁and',\n", " 'start': 850,\n", " 'end': 853},\n", " {'entity': '0',\n", " 'score': 0.999982,\n", " 'index': 209,\n", " 'word': '▁it',\n", " 'start': 854,\n", " 'end': 856},\n", " {'entity': '0',\n", " 'score': 0.7720657,\n", " 'index': 210,\n", " 'word': '▁did',\n", " 'start': 857,\n", " 'end': 860},\n", " {'entity': '0',\n", " 'score': 0.9128729,\n", " 'index': 211,\n", " 'word': '.',\n", " 'start': 860,\n", " 'end': 861},\n", " {'entity': '0',\n", " 'score': 0.99998784,\n", " 'index': 212,\n", " 'word': '▁The',\n", " 'start': 863,\n", " 'end': 866},\n", " {'entity': '0',\n", " 'score': 0.9999536,\n", " 'index': 213,\n", " 'word': '▁face',\n", " 'start': 867,\n", " 'end': 871},\n", " {'entity': '0',\n", " 'score': 0.99998736,\n", " 'index': 214,\n", " 'word': '▁on',\n", " 'start': 872,\n", " 'end': 874},\n", " {'entity': '0',\n", " 'score': 0.9995109,\n", " 'index': 215,\n", " 'word': '▁Mars',\n", " 'start': 875,\n", " 'end': 879},\n", " {'entity': '0',\n", " 'score': 0.9999553,\n", " 'index': 216,\n", " 'word': '▁soon',\n", " 'start': 880,\n", " 'end': 884},\n", " {'entity': '0',\n", " 'score': 0.9997906,\n", " 'index': 217,\n", " 'word': '▁became',\n", " 'start': 885,\n", " 'end': 891},\n", " {'entity': '0',\n", " 'score': 0.9999752,\n", " 'index': 218,\n", " 'word': '▁a',\n", " 'start': 892,\n", " 'end': 893},\n", " {'entity': '0',\n", " 'score': 0.9999167,\n", " 'index': 219,\n", " 'word': '▁pop',\n", " 'start': 894,\n", " 'end': 897},\n", " {'entity': '0',\n", " 'score': 0.9939778,\n", " 'index': 220,\n", " 'word': '▁icon',\n", " 'start': 898,\n", " 'end': 902},\n", " {'entity': '0',\n", " 'score': 0.97592837,\n", " 'index': 221,\n", " 'word': ';',\n", " 'start': 902,\n", " 'end': 903},\n", " {'entity': '0',\n", " 'score': 0.9999682,\n", " 'index': 222,\n", " 'word': '▁shot',\n", " 'start': 904,\n", " 'end': 908},\n", " {'entity': '0',\n", " 'score': 0.9999782,\n", " 'index': 223,\n", " 'word': '▁in',\n", " 'start': 909,\n", " 'end': 911},\n", " {'entity': '0',\n", " 'score': 0.95145255,\n", " 'index': 224,\n", " 'word': '▁movies',\n", " 'start': 912,\n", " 'end': 918},\n", " {'entity': '0',\n", " 'score': 0.9942631,\n", " 'index': 225,\n", " 'word': ',',\n", " 'start': 918,\n", " 'end': 919},\n", " {'entity': '0',\n", " 'score': 0.9999585,\n", " 'index': 226,\n", " 'word': '▁appeared',\n", " 'start': 920,\n", " 'end': 928},\n", " {'entity': '0',\n", " 'score': 0.9999505,\n", " 'index': 227,\n", " 'word': '▁in',\n", " 'start': 929,\n", " 'end': 931},\n", " {'entity': '0',\n", " 'score': 0.99071115,\n", " 'index': 228,\n", " 'word': '▁books',\n", " 'start': 932,\n", " 'end': 937},\n", " {'entity': '0',\n", " 'score': 0.996298,\n", " 'index': 229,\n", " 'word': ',',\n", " 'start': 937,\n", " 'end': 938},\n", " {'entity': '0',\n", " 'score': 0.96334654,\n", " 'index': 230,\n", " 'word': '▁magazine',\n", " 'start': 939,\n", " 'end': 947},\n", " {'entity': '0',\n", " 'score': 0.96163946,\n", " 'index': 231,\n", " 'word': 's',\n", " 'start': 947,\n", " 'end': 948},\n", " {'entity': '0',\n", " 'score': 0.99563783,\n", " 'index': 232,\n", " 'word': ',',\n", " 'start': 948,\n", " 'end': 949},\n", " {'entity': '0',\n", " 'score': 0.96375704,\n", " 'index': 233,\n", " 'word': '▁radio',\n", " 'start': 950,\n", " 'end': 955},\n", " {'entity': '0',\n", " 'score': 0.99991417,\n", " 'index': 234,\n", " 'word': '▁talk',\n", " 'start': 956,\n", " 'end': 960},\n", " {'entity': '0',\n", " 'score': 0.9309042,\n", " 'index': 235,\n", " 'word': '▁shows',\n", " 'start': 961,\n", " 'end': 966},\n", " {'entity': '0',\n", " 'score': 0.982239,\n", " 'index': 236,\n", " 'word': ',',\n", " 'start': 966,\n", " 'end': 967},\n", " {'entity': '0',\n", " 'score': 0.99977416,\n", " 'index': 237,\n", " 'word': '▁and',\n", " 'start': 968,\n", " 'end': 971},\n", " {'entity': '0',\n", " 'score': 0.9998679,\n", " 'index': 238,\n", " 'word': '▁ha',\n", " 'start': 972,\n", " 'end': 974},\n", " {'entity': '0',\n", " 'score': 0.9998386,\n", " 'index': 239,\n", " 'word': 'un',\n", " 'start': 974,\n", " 'end': 976},\n", " {'entity': '0',\n", " 'score': 0.99986124,\n", " 'index': 240,\n", " 'word': 'ted',\n", " 'start': 976,\n", " 'end': 979},\n", " {'entity': '0',\n", " 'score': 0.9997329,\n", " 'index': 241,\n", " 'word': '▁gro',\n", " 'start': 980,\n", " 'end': 983},\n", " {'entity': '0',\n", " 'score': 0.99980766,\n", " 'index': 242,\n", " 'word': 'cer',\n", " 'start': 983,\n", " 'end': 986},\n", " {'entity': '0',\n", " 'score': 0.9998802,\n", " 'index': 243,\n", " 'word': 'y',\n", " 'start': 986,\n", " 'end': 987},\n", " {'entity': '0',\n", " 'score': 0.99945086,\n", " 'index': 244,\n", " 'word': '▁store',\n", " 'start': 988,\n", " 'end': 993},\n", " {'entity': '0',\n", " 'score': 0.99998605,\n", " 'index': 245,\n", " 'word': '▁check',\n", " 'start': 994,\n", " 'end': 999},\n", " {'entity': '0',\n", " 'score': 0.9999876,\n", " 'index': 246,\n", " 'word': 'out',\n", " 'start': 999,\n", " 'end': 1002},\n", " {'entity': '0',\n", " 'score': 0.9878735,\n", " 'index': 247,\n", " 'word': '▁lines',\n", " 'start': 1003,\n", " 'end': 1008},\n", " {'entity': '0',\n", " 'score': 0.999974,\n", " 'index': 248,\n", " 'word': '▁for',\n", " 'start': 1009,\n", " 'end': 1012},\n", " {'entity': '0',\n", " 'score': 0.9999863,\n", " 'index': 249,\n", " 'word': '▁25',\n", " 'start': 1013,\n", " 'end': 1015},\n", " {'entity': '0',\n", " 'score': 0.9723751,\n", " 'index': 250,\n", " 'word': '▁years',\n", " 'start': 1016,\n", " 'end': 1021},\n", " {'entity': '0',\n", " 'score': 0.970573,\n", " 'index': 251,\n", " 'word': '.',\n", " 'start': 1021,\n", " 'end': 1022},\n", " {'entity': '0',\n", " 'score': 0.9999856,\n", " 'index': 252,\n", " 'word': '▁Some',\n", " 'start': 1023,\n", " 'end': 1027},\n", " {'entity': '0',\n", " 'score': 0.9999589,\n", " 'index': 253,\n", " 'word': '▁people',\n", " 'start': 1028,\n", " 'end': 1034},\n", " {'entity': '0',\n", " 'score': 0.9989717,\n", " 'index': 254,\n", " 'word': '▁thought',\n", " 'start': 1035,\n", " 'end': 1042},\n", " {'entity': '0',\n", " 'score': 0.9999881,\n", " 'index': 255,\n", " 'word': '▁the',\n", " 'start': 1043,\n", " 'end': 1046},\n", " {'entity': '0',\n", " 'score': 0.9999894,\n", " 'index': 256,\n", " 'word': '▁natural',\n", " 'start': 1047,\n", " 'end': 1054},\n", " {'entity': '0',\n", " 'score': 0.9999033,\n", " 'index': 257,\n", " 'word': '▁land',\n", " 'start': 1055,\n", " 'end': 1059},\n", " {'entity': '0',\n", " 'score': 0.9999039,\n", " 'index': 258,\n", " 'word': 'form',\n", " 'start': 1059,\n", " 'end': 1063},\n", " {'entity': '0',\n", " 'score': 0.9999722,\n", " 'index': 259,\n", " 'word': '▁was',\n", " 'start': 1064,\n", " 'end': 1067},\n", " {'entity': '0',\n", " 'score': 0.9999889,\n", " 'index': 260,\n", " 'word': '▁evidence',\n", " 'start': 1068,\n", " 'end': 1076},\n", " {'entity': '0',\n", " 'score': 0.9999753,\n", " 'index': 261,\n", " 'word': '▁of',\n", " 'start': 1077,\n", " 'end': 1079},\n", " {'entity': '0',\n", " 'score': 0.9999536,\n", " 'index': 262,\n", " 'word': '▁life',\n", " 'start': 1080,\n", " 'end': 1084},\n", " {'entity': '0',\n", " 'score': 0.9999864,\n", " 'index': 263,\n", " 'word': '▁on',\n", " 'start': 1085,\n", " 'end': 1087},\n", " {'entity': '0',\n", " 'score': 0.96373415,\n", " 'index': 264,\n", " 'word': '▁Mars',\n", " 'start': 1088,\n", " 'end': 1092},\n", " {'entity': '0',\n", " 'score': 0.9778991,\n", " 'index': 265,\n", " 'word': ',',\n", " 'start': 1092,\n", " 'end': 1093},\n", " {'entity': '0',\n", " 'score': 0.9999219,\n", " 'index': 266,\n", " 'word': '▁and',\n", " 'start': 1094,\n", " 'end': 1097},\n", " {'entity': '0',\n", " 'score': 0.9998994,\n", " 'index': 267,\n", " 'word': '▁that',\n", " 'start': 1098,\n", " 'end': 1102},\n", " {'entity': '0',\n", " 'score': 0.9998392,\n", " 'index': 268,\n", " 'word': '▁us',\n", " 'start': 1103,\n", " 'end': 1105},\n", " {'entity': '0',\n", " 'score': 0.9998784,\n", " 'index': 269,\n", " 'word': '▁scientist',\n", " 'start': 1106,\n", " 'end': 1115},\n", " {'entity': '0',\n", " 'score': 0.99990356,\n", " 'index': 270,\n", " 'word': 's',\n", " 'start': 1115,\n", " 'end': 1116},\n", " {'entity': '0',\n", " 'score': 0.9999876,\n", " 'index': 271,\n", " 'word': '▁wanted',\n", " 'start': 1117,\n", " 'end': 1123},\n", " {'entity': '0',\n", " 'score': 0.99998724,\n", " 'index': 272,\n", " 'word': '▁to',\n", " 'start': 1124,\n", " 'end': 1126},\n", " {'entity': '0',\n", " 'score': 0.99998665,\n", " 'index': 273,\n", " 'word': '▁hi',\n", " 'start': 1127,\n", " 'end': 1129},\n", " {'entity': '0',\n", " 'score': 0.9999869,\n", " 'index': 274,\n", " 'word': 'de',\n", " 'start': 1129,\n", " 'end': 1131},\n", " {'entity': '0',\n", " 'score': 0.6984844,\n", " 'index': 275,\n", " 'word': '▁it',\n", " 'start': 1132,\n", " 'end': 1134},\n", " {'entity': '0',\n", " 'score': 0.93957794,\n", " 'index': 276,\n", " 'word': ',',\n", " 'start': 1134,\n", " 'end': 1135},\n", " {'entity': '0',\n", " 'score': 0.88728845,\n", " 'index': 277,\n", " 'word': '▁but',\n", " 'start': 1136,\n", " 'end': 1139},\n", " {'entity': '0',\n", " 'score': 0.8514652,\n", " 'index': 278,\n", " 'word': '▁really',\n", " 'start': 1140,\n", " 'end': 1146},\n", " {'entity': '0',\n", " 'score': 0.9391215,\n", " 'index': 279,\n", " 'word': ',',\n", " 'start': 1146,\n", " 'end': 1147},\n", " {'entity': '0',\n", " 'score': 0.99998224,\n", " 'index': 280,\n", " 'word': '▁the',\n", " 'start': 1148,\n", " 'end': 1151},\n", " {'entity': '0',\n", " 'score': 0.9999875,\n", " 'index': 281,\n", " 'word': '▁defender',\n", " 'start': 1152,\n", " 'end': 1160},\n", " {'entity': '0',\n", " 'score': 0.9999875,\n", " 'index': 282,\n", " 'word': 's',\n", " 'start': 1160,\n", " 'end': 1161},\n", " {'entity': '0',\n", " 'score': 0.99997973,\n", " 'index': 283,\n", " 'word': '▁of',\n", " 'start': 1162,\n", " 'end': 1164},\n", " {'entity': '0',\n", " 'score': 0.9999875,\n", " 'index': 284,\n", " 'word': '▁the',\n", " 'start': 1165,\n", " 'end': 1168},\n", " {'entity': '0',\n", " 'score': 0.99969745,\n", " 'index': 285,\n", " 'word': '▁NASA',\n", " 'start': 1169,\n", " 'end': 1173},\n", " {'entity': '0',\n", " 'score': 0.9937796,\n", " 'index': 286,\n", " 'word': '▁budget',\n", " 'start': 1174,\n", " 'end': 1180},\n", " {'entity': '0',\n", " 'score': 0.9906232,\n", " 'index': 287,\n", " 'word': '▁wish',\n", " 'start': 1181,\n", " 'end': 1185},\n", " {'entity': '0',\n", " 'score': 0.99996674,\n", " 'index': 288,\n", " 'word': '▁there',\n", " 'start': 1186,\n", " 'end': 1191},\n", " {'entity': '0',\n", " 'score': 0.99969625,\n", " 'index': 289,\n", " 'word': '▁was',\n", " 'start': 1192,\n", " 'end': 1195},\n", " {'entity': '0',\n", " 'score': 0.99998605,\n", " 'index': 290,\n", " 'word': '▁an',\n", " 'start': 1196,\n", " 'end': 1198},\n", " {'entity': '0',\n", " 'score': 0.99998045,\n", " 'index': 291,\n", " 'word': 'cient',\n", " 'start': 1198,\n", " 'end': 1203},\n", " {'entity': '0',\n", " 'score': 0.9999665,\n", " 'index': 292,\n", " 'word': '▁civiliza',\n", " 'start': 1204,\n", " 'end': 1212},\n", " {'entity': '0',\n", " 'score': 0.99996734,\n", " 'index': 293,\n", " 'word': 'tion',\n", " 'start': 1212,\n", " 'end': 1216},\n", " {'entity': '0',\n", " 'score': 0.99998844,\n", " 'index': 294,\n", " 'word': '▁on',\n", " 'start': 1217,\n", " 'end': 1219},\n", " {'entity': '0',\n", " 'score': 0.9755779,\n", " 'index': 295,\n", " 'word': '▁Mars',\n", " 'start': 1220,\n", " 'end': 1224},\n", " {'entity': '0',\n", " 'score': 0.98278147,\n", " 'index': 296,\n", " 'word': '.',\n", " 'start': 1224,\n", " 'end': 1225},\n", " {'entity': '0',\n", " 'score': 0.9999839,\n", " 'index': 297,\n", " 'word': '▁We',\n", " 'start': 1226,\n", " 'end': 1228},\n", " {'entity': '0',\n", " 'score': 0.9999614,\n", " 'index': 298,\n", " 'word': '▁decided',\n", " 'start': 1229,\n", " 'end': 1236},\n", " {'entity': '0',\n", " 'score': 0.99998903,\n", " 'index': 299,\n", " 'word': '▁to',\n", " 'start': 1237,\n", " 'end': 1239},\n", " {'entity': '0',\n", " 'score': 0.9999876,\n", " 'index': 300,\n", " 'word': '▁take',\n", " 'start': 1240,\n", " 'end': 1244},\n", " {'entity': '0',\n", " 'score': 0.9999888,\n", " 'index': 301,\n", " 'word': '▁another',\n", " 'start': 1245,\n", " 'end': 1252},\n", " {'entity': '0',\n", " 'score': 0.576694,\n", " 'index': 302,\n", " 'word': '▁shot',\n", " 'start': 1253,\n", " 'end': 1257},\n", " {'entity': '0',\n", " 'score': 0.9998617,\n", " 'index': 303,\n", " 'word': '▁just',\n", " 'start': 1258,\n", " 'end': 1262},\n", " {'entity': '0',\n", " 'score': 0.999987,\n", " 'index': 304,\n", " 'word': '▁to',\n", " 'start': 1263,\n", " 'end': 1265},\n", " {'entity': '0',\n", " 'score': 0.99998534,\n", " 'index': 305,\n", " 'word': '▁make',\n", " 'start': 1266,\n", " 'end': 1270},\n", " {'entity': '0',\n", " 'score': 0.99988437,\n", " 'index': 306,\n", " 'word': '▁sure',\n", " 'start': 1271,\n", " 'end': 1275},\n", " {'entity': '0',\n", " 'score': 0.9999877,\n", " 'index': 307,\n", " 'word': '▁we',\n", " 'start': 1276,\n", " 'end': 1278},\n", " {'entity': '0',\n", " 'score': 0.9999863,\n", " 'index': 308,\n", " 'word': '▁were',\n", " 'start': 1279,\n", " 'end': 1283},\n", " {'entity': '0',\n", " 'score': 0.9999697,\n", " 'index': 309,\n", " 'word': 'n',\n", " 'start': 1283,\n", " 'end': 1284},\n", " {'entity': '0',\n", " 'score': 0.9999664,\n", " 'index': 310,\n", " 'word': \"'\",\n", " 'start': 1284,\n", " 'end': 1285},\n", " {'entity': '0',\n", " 'score': 0.99997973,\n", " 'index': 311,\n", " 'word': 't',\n", " 'start': 1285,\n", " 'end': 1286},\n", " {'entity': '0',\n", " 'score': 0.92863667,\n", " 'index': 312,\n", " 'word': '▁wrong',\n", " 'start': 1287,\n", " 'end': 1292},\n", " {'entity': '0',\n", " 'score': 0.9716512,\n", " 'index': 313,\n", " 'word': ',',\n", " 'start': 1292,\n", " 'end': 1293},\n", " {'entity': '0',\n", " 'score': 0.99990296,\n", " 'index': 314,\n", " 'word': '▁on',\n", " 'start': 1294,\n", " 'end': 1296},\n", " {'entity': '0',\n", " 'score': 0.99997044,\n", " 'index': 315,\n", " 'word': '▁April',\n", " 'start': 1297,\n", " 'end': 1302},\n", " {'entity': '0',\n", " 'score': 0.99988294,\n", " 'index': 316,\n", " 'word': '▁5',\n", " 'start': 1303,\n", " 'end': 1304},\n", " {'entity': '0',\n", " 'score': 0.999974,\n", " 'index': 317,\n", " 'word': ',',\n", " 'start': 1304,\n", " 'end': 1305},\n", " {'entity': '.',\n", " 'score': 0.95378405,\n", " 'index': 318,\n", " 'word': '▁1998.',\n", " 'start': 1306,\n", " 'end': 1311},\n", " {'entity': '0',\n", " 'score': 0.9999124,\n", " 'index': 319,\n", " 'word': '▁Michael',\n", " 'start': 1312,\n", " 'end': 1319},\n", " {'entity': '0',\n", " 'score': 0.98158044,\n", " 'index': 320,\n", " 'word': '▁Malin',\n", " 'start': 1320,\n", " 'end': 1325},\n", " {'entity': '0',\n", " 'score': 0.99995947,\n", " 'index': 321,\n", " 'word': '▁and',\n", " 'start': 1326,\n", " 'end': 1329},\n", " {'entity': '0',\n", " 'score': 0.9999808,\n", " 'index': 322,\n", " 'word': '▁his',\n", " 'start': 1330,\n", " 'end': 1333},\n", " {'entity': '0',\n", " 'score': 0.99976593,\n", " 'index': 323,\n", " 'word': '▁Mars',\n", " 'start': 1334,\n", " 'end': 1338},\n", " {'entity': '0',\n", " 'score': 0.99988925,\n", " 'index': 324,\n", " 'word': '▁Or',\n", " 'start': 1339,\n", " 'end': 1341},\n", " {'entity': '0',\n", " 'score': 0.99988866,\n", " 'index': 325,\n", " 'word': 'bit',\n", " 'start': 1341,\n", " 'end': 1344},\n", " {'entity': '0',\n", " 'score': 0.99990416,\n", " 'index': 326,\n", " 'word': 'er',\n", " 'start': 1344,\n", " 'end': 1346},\n", " {'entity': '0',\n", " 'score': 0.9998735,\n", " 'index': 327,\n", " 'word': '▁camera',\n", " 'start': 1347,\n", " 'end': 1353},\n", " {'entity': '0',\n", " 'score': 0.9773008,\n", " 'index': 328,\n", " 'word': '▁team',\n", " 'start': 1354,\n", " 'end': 1358},\n", " {'entity': '0',\n", " 'score': 0.99995935,\n", " 'index': 329,\n", " 'word': '▁took',\n", " 'start': 1359,\n", " 'end': 1363},\n", " {'entity': '0',\n", " 'score': 0.99998975,\n", " 'index': 330,\n", " 'word': '▁a',\n", " 'start': 1364,\n", " 'end': 1365},\n", " {'entity': '0',\n", " 'score': 0.9995982,\n", " 'index': 331,\n", " 'word': '▁picture',\n", " 'start': 1366,\n", " 'end': 1373},\n", " {'entity': '0',\n", " 'score': 0.9999869,\n", " 'index': 332,\n", " 'word': '▁that',\n", " 'start': 1374,\n", " 'end': 1378},\n", " {'entity': '0',\n", " 'score': 0.9999856,\n", " 'index': 333,\n", " 'word': '▁was',\n", " 'start': 1379,\n", " 'end': 1382},\n", " {'entity': '0',\n", " 'score': 0.99999,\n", " 'index': 334,\n", " 'word': '▁ten',\n", " 'start': 1383,\n", " 'end': 1386},\n", " {'entity': '0',\n", " 'score': 0.9999894,\n", " 'index': 335,\n", " 'word': '▁times',\n", " 'start': 1387,\n", " 'end': 1392},\n", " {'entity': '0',\n", " 'score': 0.99997914,\n", " 'index': 336,\n", " 'word': '▁sharp',\n", " 'start': 1393,\n", " 'end': 1398},\n", " {'entity': '0',\n", " 'score': 0.99998355,\n", " 'index': 337,\n", " 'word': 'er',\n", " 'start': 1398,\n", " 'end': 1400},\n", " {'entity': '0',\n", " 'score': 0.9999882,\n", " 'index': 338,\n", " 'word': '▁than',\n", " 'start': 1401,\n", " 'end': 1405},\n", " {'entity': '0',\n", " 'score': 0.99998236,\n", " 'index': 339,\n", " 'word': '▁the',\n", " 'start': 1406,\n", " 'end': 1409},\n", " {'entity': '0',\n", " 'score': 0.991712,\n", " 'index': 340,\n", " 'word': '▁original',\n", " 'start': 1410,\n", " 'end': 1418},\n", " {'entity': '0',\n", " 'score': 0.99961096,\n", " 'index': 341,\n", " 'word': '▁Viking',\n", " 'start': 1419,\n", " 'end': 1425},\n", " {'entity': '0',\n", " 'score': 0.9250188,\n", " 'index': 342,\n", " 'word': '▁photos',\n", " 'start': 1426,\n", " 'end': 1432},\n", " {'entity': '0',\n", " 'score': 0.9903465,\n", " 'index': 343,\n", " 'word': ',',\n", " 'start': 1432,\n", " 'end': 1433},\n", " {'entity': '0',\n", " 'score': 0.9999404,\n", " 'index': 344,\n", " 'word': '▁reveal',\n", " 'start': 1434,\n", " 'end': 1440},\n", " {'entity': '0',\n", " 'score': 0.99993956,\n", " 'index': 345,\n", " 'word': 'ing',\n", " 'start': 1440,\n", " 'end': 1443},\n", " {'entity': '0',\n", " 'score': 0.9999809,\n", " 'index': 346,\n", " 'word': '▁a',\n", " 'start': 1444,\n", " 'end': 1445},\n", " {'entity': '0',\n", " 'score': 0.99997795,\n", " 'index': 347,\n", " 'word': '▁natural',\n", " 'start': 1446,\n", " 'end': 1453},\n", " {'entity': '0',\n", " 'score': 0.93572664,\n", " 'index': 348,\n", " 'word': '▁land',\n", " 'start': 1454,\n", " 'end': 1458},\n", " {'entity': '0',\n", " 'score': 0.94067293,\n", " 'index': 349,\n", " 'word': 'form',\n", " 'start': 1458,\n", " 'end': 1462},\n", " {'entity': '0',\n", " 'score': 0.9671829,\n", " 'index': 350,\n", " 'word': ',',\n", " 'start': 1462,\n", " 'end': 1463},\n", " {'entity': '0',\n", " 'score': 0.99992955,\n", " 'index': 351,\n", " 'word': '▁which',\n", " 'start': 1464,\n", " 'end': 1469},\n", " {'entity': '0',\n", " 'score': 0.9984102,\n", " 'index': 352,\n", " 'word': '▁meant',\n", " 'start': 1470,\n", " 'end': 1475},\n", " {'entity': '0',\n", " 'score': 0.9998084,\n", " 'index': 353,\n", " 'word': '▁no',\n", " 'start': 1476,\n", " 'end': 1478},\n", " {'entity': '0',\n", " 'score': 0.99996555,\n", " 'index': 354,\n", " 'word': '▁alien',\n", " 'start': 1479,\n", " 'end': 1484},\n", " {'entity': '0',\n", " 'score': 0.9784937,\n", " 'index': 355,\n", " 'word': '▁monument',\n", " 'start': 1485,\n", " 'end': 1493},\n", " {'entity': '0',\n", " 'score': 0.9887007,\n", " 'index': 356,\n", " 'word': '.',\n", " 'start': 1493,\n", " 'end': 1494},\n", " {'entity': '0',\n", " 'score': 0.9980032,\n", " 'index': 357,\n", " 'word': '▁\"',\n", " 'start': 1495,\n", " 'end': 1496},\n", " {'entity': '0',\n", " 'score': 0.99696094,\n", " 'index': 358,\n", " 'word': 'But',\n", " 'start': 1496,\n", " 'end': 1499},\n", " {'entity': '0',\n", " 'score': 0.99998856,\n", " 'index': 359,\n", " 'word': '▁that',\n", " 'start': 1500,\n", " 'end': 1504},\n", " {'entity': '0',\n", " 'score': 0.9999746,\n", " 'index': 360,\n", " 'word': '▁picture',\n", " 'start': 1505,\n", " 'end': 1512},\n", " {'entity': '0',\n", " 'score': 0.9999877,\n", " 'index': 361,\n", " 'word': '▁wasn',\n", " 'start': 1513,\n", " 'end': 1517},\n", " {'entity': '0',\n", " 'score': 0.9999833,\n", " 'index': 362,\n", " 'word': \"'\",\n", " 'start': 1517,\n", " 'end': 1518},\n", " {'entity': '0',\n", " 'score': 0.9999864,\n", " 'index': 363,\n", " 'word': 't',\n", " 'start': 1518,\n", " 'end': 1519},\n", " {'entity': '0',\n", " 'score': 0.9999876,\n", " 'index': 364,\n", " 'word': '▁very',\n", " 'start': 1520,\n", " 'end': 1524},\n", " {'entity': '0',\n", " 'score': 0.99985874,\n", " 'index': 365,\n", " 'word': '▁clear',\n", " 'start': 1525,\n", " 'end': 1530},\n", " {'entity': '0',\n", " 'score': 0.9999877,\n", " 'index': 366,\n", " 'word': '▁at',\n", " 'start': 1531,\n", " 'end': 1533},\n", " {'entity': '0',\n", " 'score': 0.9274058,\n", " 'index': 367,\n", " 'word': '▁all',\n", " 'start': 1534,\n", " 'end': 1537},\n", " {'entity': '0',\n", " 'score': 0.88927543,\n", " 'index': 368,\n", " 'word': ',',\n", " 'start': 1537,\n", " 'end': 1538},\n", " {'entity': '0',\n", " 'score': 0.99990225,\n", " 'index': 369,\n", " 'word': '▁which',\n", " 'start': 1539,\n", " 'end': 1544},\n", " {'entity': '0',\n", " 'score': 0.9999838,\n", " 'index': 370,\n", " 'word': '▁could',\n", " 'start': 1545,\n", " 'end': 1550},\n", " {'entity': '0',\n", " 'score': 0.9986493,\n", " 'index': 371,\n", " 'word': '▁mean',\n", " 'start': 1551,\n", " 'end': 1555},\n", " {'entity': '0',\n", " 'score': 0.999985,\n", " 'index': 372,\n", " 'word': '▁alien',\n", " 'start': 1556,\n", " 'end': 1561},\n", " {'entity': '0',\n", " 'score': 0.9999751,\n", " 'index': 373,\n", " 'word': '▁mark',\n", " 'start': 1562,\n", " 'end': 1566},\n", " {'entity': '0',\n", " 'score': 0.99997675,\n", " 'index': 374,\n", " 'word': 'ings',\n", " 'start': 1566,\n", " 'end': 1570},\n", " {'entity': '0',\n", " 'score': 0.9999702,\n", " 'index': 375,\n", " 'word': '▁were',\n", " 'start': 1571,\n", " 'end': 1575},\n", " {'entity': '0',\n", " 'score': 0.9994505,\n", " 'index': 376,\n", " 'word': '▁hidden',\n", " 'start': 1576,\n", " 'end': 1582},\n", " {'entity': '0',\n", " 'score': 0.99997675,\n", " 'index': 377,\n", " 'word': '▁by',\n", " 'start': 1583,\n", " 'end': 1585},\n", " {'entity': '.',\n", " 'score': 0.8143309,\n", " 'index': 378,\n", " 'word': '▁ha',\n", " 'start': 1586,\n", " 'end': 1588},\n", " {'entity': '.',\n", " 'score': 0.7348569,\n", " 'index': 379,\n", " 'word': 'ze',\n", " 'start': 1588,\n", " 'end': 1590},\n", " {'entity': '.',\n", " 'score': 0.90871817,\n", " 'index': 380,\n", " 'word': '\"',\n", " 'start': 1590,\n", " 'end': 1591},\n", " {'entity': '0',\n", " 'score': 0.51734453,\n", " 'index': 381,\n", " 'word': '▁Well',\n", " 'start': 1592,\n", " 'end': 1596},\n", " {'entity': ',',\n", " 'score': 0.55640894,\n", " 'index': 382,\n", " 'word': '▁no',\n", " 'start': 1597,\n", " 'end': 1599},\n", " {'entity': '0',\n", " 'score': 0.90493226,\n", " 'index': 383,\n", " 'word': ',',\n", " 'start': 1599,\n", " 'end': 1600},\n", " {'entity': ',',\n", " 'score': 0.7527332,\n", " 'index': 384,\n", " 'word': '▁yes',\n", " 'start': 1601,\n", " 'end': 1604},\n", " {'entity': '0',\n", " 'score': 0.9999099,\n", " 'index': 385,\n", " 'word': '▁that',\n", " 'start': 1605,\n", " 'end': 1609},\n", " {'entity': '0',\n", " 'score': 0.9999504,\n", " 'index': 386,\n", " 'word': '▁rumor',\n", " 'start': 1610,\n", " 'end': 1615},\n", " {'entity': '0',\n", " 'score': 0.9237974,\n", " 'index': 387,\n", " 'word': '▁started',\n", " 'start': 1616,\n", " 'end': 1623},\n", " {'entity': '0',\n", " 'score': 0.9712035,\n", " 'index': 388,\n", " 'word': ',',\n", " 'start': 1623,\n", " 'end': 1624},\n", " {'entity': '0',\n", " 'score': 0.8615394,\n", " 'index': 389,\n", " 'word': '▁but',\n", " 'start': 1625,\n", " 'end': 1628},\n", " {'entity': '0',\n", " 'score': 0.99998593,\n", " 'index': 390,\n", " 'word': '▁to',\n", " 'start': 1629,\n", " 'end': 1631},\n", " {'entity': '0',\n", " 'score': 0.99998534,\n", " 'index': 391,\n", " 'word': '▁prove',\n", " 'start': 1632,\n", " 'end': 1637},\n", " {'entity': '0',\n", " 'score': 0.99958175,\n", " 'index': 392,\n", " 'word': '▁them',\n", " 'start': 1638,\n", " 'end': 1642},\n", " {'entity': ',',\n", " 'score': 0.8349122,\n", " 'index': 393,\n", " 'word': '▁wrong',\n", " 'start': 1643,\n", " 'end': 1648},\n", " {'entity': '0',\n", " 'score': 0.9998111,\n", " 'index': 394,\n", " 'word': '▁on',\n", " 'start': 1649,\n", " 'end': 1651},\n", " {'entity': '0',\n", " 'score': 0.99996305,\n", " 'index': 395,\n", " 'word': '▁April',\n", " 'start': 1652,\n", " 'end': 1657},\n", " {'entity': '0',\n", " 'score': 0.9999033,\n", " 'index': 396,\n", " 'word': '▁8',\n", " 'start': 1658,\n", " 'end': 1659},\n", " {'entity': '0',\n", " 'score': 0.99996483,\n", " 'index': 397,\n", " 'word': ',',\n", " 'start': 1659,\n", " 'end': 1660},\n", " {'entity': '0',\n", " 'score': 0.6733225,\n", " 'index': 398,\n", " 'word': '▁2001',\n", " 'start': 1661,\n", " 'end': 1665},\n", " {'entity': '0',\n", " 'score': 0.9999819,\n", " 'index': 399,\n", " 'word': '▁we',\n", " 'start': 1666,\n", " 'end': 1668},\n", " {'entity': '0',\n", " 'score': 0.99993896,\n", " 'index': 400,\n", " 'word': '▁decided',\n", " 'start': 1669,\n", " 'end': 1676},\n", " {'entity': '0',\n", " 'score': 0.9999876,\n", " 'index': 401,\n", " 'word': '▁to',\n", " 'start': 1677,\n", " 'end': 1679},\n", " {'entity': '0',\n", " 'score': 0.99998784,\n", " 'index': 402,\n", " 'word': '▁take',\n", " 'start': 1680,\n", " 'end': 1684},\n", " {'entity': '0',\n", " 'score': 0.9999888,\n", " 'index': 403,\n", " 'word': '▁another',\n", " 'start': 1685,\n", " 'end': 1692},\n", " {'entity': '0',\n", " 'score': 0.88930416,\n", " 'index': 404,\n", " 'word': '▁picture',\n", " 'start': 1693,\n", " 'end': 1700},\n", " {'entity': '0',\n", " 'score': 0.9949751,\n", " 'index': 405,\n", " 'word': ',',\n", " 'start': 1700,\n", " 'end': 1701},\n", " {'entity': '0',\n", " 'score': 0.9999832,\n", " 'index': 406,\n", " 'word': '▁making',\n", " 'start': 1702,\n", " 'end': 1708},\n", " {'entity': '0',\n", " 'score': 0.99980956,\n", " 'index': 407,\n", " 'word': '▁sure',\n", " 'start': 1709,\n", " 'end': 1713},\n", " {'entity': '0',\n", " 'score': 0.9999883,\n", " 'index': 408,\n", " 'word': '▁it',\n", " 'start': 1714,\n", " 'end': 1716},\n", " {'entity': '0',\n", " 'score': 0.9998956,\n", " 'index': 409,\n", " 'word': '▁was',\n", " 'start': 1717,\n", " 'end': 1720},\n", " {'entity': '0',\n", " 'score': 0.9999738,\n", " 'index': 410,\n", " 'word': '▁a',\n", " 'start': 1721,\n", " 'end': 1722},\n", " {'entity': '0',\n", " 'score': 0.9971227,\n", " 'index': 411,\n", " 'word': '▁cloud',\n", " 'start': 1723,\n", " 'end': 1728},\n", " {'entity': '0',\n", " 'score': 0.99745196,\n", " 'index': 412,\n", " 'word': 'less',\n", " 'start': 1728,\n", " 'end': 1732},\n", " {'entity': '0',\n", " 'score': 0.9999789,\n", " 'index': 413,\n", " 'word': '▁summer',\n", " 'start': 1733,\n", " 'end': 1739},\n", " {'entity': '0',\n", " 'score': 0.9822667,\n", " 'index': 414,\n", " 'word': '▁day',\n", " 'start': 1740,\n", " 'end': 1743},\n", " {'entity': '0',\n", " 'score': 0.983127,\n", " 'index': 415,\n", " 'word': '.',\n", " 'start': 1743,\n", " 'end': 1744},\n", " {'entity': '0',\n", " 'score': 0.99998796,\n", " 'index': 416,\n", " 'word': '▁Malin',\n", " 'start': 1745,\n", " 'end': 1750},\n", " {'entity': '0',\n", " 'score': 0.9999881,\n", " 'index': 417,\n", " 'word': \"'\",\n", " 'start': 1750,\n", " 'end': 1751},\n", " {'entity': '0',\n", " 'score': 0.99998975,\n", " 'index': 418,\n", " 'word': 's',\n", " 'start': 1751,\n", " 'end': 1752},\n", " {'entity': '0',\n", " 'score': 0.9999672,\n", " 'index': 419,\n", " 'word': '▁team',\n", " 'start': 1753,\n", " 'end': 1757},\n", " {'entity': '0',\n", " 'score': 0.99998546,\n", " 'index': 420,\n", " 'word': '▁capture',\n", " 'start': 1758,\n", " 'end': 1765},\n", " {'entity': '0',\n", " 'score': 0.99997926,\n", " 'index': 421,\n", " 'word': 'd',\n", " 'start': 1765,\n", " 'end': 1766},\n", " {'entity': '0',\n", " 'score': 0.9999893,\n", " 'index': 422,\n", " 'word': '▁an',\n", " 'start': 1767,\n", " 'end': 1769},\n", " {'entity': '0',\n", " 'score': 0.9999887,\n", " 'index': 423,\n", " 'word': '▁amazing',\n", " 'start': 1770,\n", " 'end': 1777},\n", " {'entity': '0',\n", " 'score': 0.8521725,\n", " 'index': 424,\n", " 'word': '▁photo',\n", " 'start': 1778,\n", " 'end': 1783},\n", " {'entity': '0',\n", " 'score': 0.9999713,\n", " 'index': 425,\n", " 'word': '▁using',\n", " 'start': 1784,\n", " 'end': 1789},\n", " {'entity': '0',\n", " 'score': 0.99997926,\n", " 'index': 426,\n", " 'word': '▁the',\n", " 'start': 1790,\n", " 'end': 1793},\n", " {'entity': '0',\n", " 'score': 0.9999058,\n", " 'index': 427,\n", " 'word': '▁camera',\n", " 'start': 1794,\n", " 'end': 1800},\n", " {'entity': '0',\n", " 'score': 0.99995863,\n", " 'index': 428,\n", " 'word': \"'\",\n", " 'start': 1800,\n", " 'end': 1801},\n", " {'entity': '0',\n", " 'score': 0.9999323,\n", " 'index': 429,\n", " 'word': 's',\n", " 'start': 1801,\n", " 'end': 1802},\n", " {'entity': '0',\n", " 'score': 0.99984443,\n", " 'index': 430,\n", " 'word': '▁absolute',\n", " 'start': 1803,\n", " 'end': 1811},\n", " {'entity': '0',\n", " 'score': 0.9998752,\n", " 'index': 431,\n", " 'word': '▁maximum',\n", " 'start': 1812,\n", " 'end': 1819},\n", " {'entity': '0',\n", " 'score': 0.98818076,\n", " 'index': 432,\n", " 'word': '▁revolution',\n", " 'start': 1820,\n", " 'end': 1830},\n", " {'entity': '0',\n", " 'score': 0.9950777,\n", " 'index': 433,\n", " 'word': '.',\n", " 'start': 1830,\n", " 'end': 1831},\n", " {'entity': '0',\n", " 'score': 0.99994016,\n", " 'index': 434,\n", " 'word': '▁With',\n", " 'start': 1832,\n", " 'end': 1836},\n", " {'entity': '0',\n", " 'score': 0.99968374,\n", " 'index': 435,\n", " 'word': '▁this',\n", " 'start': 1837,\n", " 'end': 1841},\n", " {'entity': '0',\n", " 'score': 0.542863,\n", " 'index': 436,\n", " 'word': '▁camera',\n", " 'start': 1842,\n", " 'end': 1848},\n", " {'entity': '0',\n", " 'score': 0.99998796,\n", " 'index': 437,\n", " 'word': '▁you',\n", " 'start': 1849,\n", " 'end': 1852},\n", " {'entity': '0',\n", " 'score': 0.9999691,\n", " 'index': 438,\n", " 'word': '▁can',\n", " 'start': 1853,\n", " 'end': 1856},\n", " {'entity': '0',\n", " 'score': 0.99997675,\n", " 'index': 439,\n", " 'word': '▁discern',\n", " 'start': 1857,\n", " 'end': 1864},\n", " {'entity': '0',\n", " 'score': 0.9988336,\n", " 'index': 440,\n", " 'word': '▁things',\n", " 'start': 1865,\n", " 'end': 1871},\n", " {'entity': '0',\n", " 'score': 0.9999831,\n", " 'index': 441,\n", " 'word': '▁in',\n", " 'start': 1872,\n", " 'end': 1874},\n", " {'entity': '0',\n", " 'score': 0.9999877,\n", " 'index': 442,\n", " 'word': '▁a',\n", " 'start': 1875,\n", " 'end': 1876},\n", " {'entity': '0',\n", " 'score': 0.999985,\n", " 'index': 443,\n", " 'word': '▁digital',\n", " 'start': 1877,\n", " 'end': 1884},\n", " {'entity': '0',\n", " 'score': 0.992401,\n", " 'index': 444,\n", " 'word': '▁image',\n", " 'start': 1885,\n", " 'end': 1890},\n", " {'entity': '0',\n", " 'score': 0.9876018,\n", " 'index': 445,\n", " 'word': ',',\n", " 'start': 1890,\n", " 'end': 1891},\n", " {'entity': '0',\n", " 'score': 0.9998029,\n", " 'index': 446,\n", " 'word': '▁3',\n", " 'start': 1892,\n", " 'end': 1893},\n", " {'entity': '0',\n", " 'score': 0.9999795,\n", " 'index': 447,\n", " 'word': '▁times',\n", " 'start': 1894,\n", " 'end': 1899},\n", " {'entity': '0',\n", " 'score': 0.99997413,\n", " 'index': 448,\n", " 'word': '▁bigger',\n", " 'start': 1900,\n", " 'end': 1906},\n", " {'entity': '0',\n", " 'score': 0.999977,\n", " 'index': 449,\n", " 'word': '▁than',\n", " 'start': 1907,\n", " 'end': 1911},\n", " {'entity': '0',\n", " 'score': 0.9999838,\n", " 'index': 450,\n", " 'word': '▁the',\n", " 'start': 1912,\n", " 'end': 1915},\n", " {'entity': '0',\n", " 'score': 0.99995077,\n", " 'index': 451,\n", " 'word': '▁pixel',\n", " 'start': 1916,\n", " 'end': 1921},\n", " {'entity': ',',\n", " 'score': 0.844227,\n", " 'index': 452,\n", " 'word': '▁size',\n", " 'start': 1922,\n", " 'end': 1926},\n", " {'entity': '0',\n", " 'score': 0.9999434,\n", " 'index': 453,\n", " 'word': '▁which',\n", " 'start': 1927,\n", " 'end': 1932},\n", " {'entity': '0',\n", " 'score': 0.8158125,\n", " 'index': 454,\n", " 'word': '▁means',\n", " 'start': 1933,\n", " 'end': 1938},\n", " {'entity': '0',\n", " 'score': 0.9999814,\n", " 'index': 455,\n", " 'word': '▁if',\n", " 'start': 1939,\n", " 'end': 1941},\n", " {'entity': '0',\n", " 'score': 0.99998975,\n", " 'index': 456,\n", " 'word': '▁there',\n", " 'start': 1942,\n", " 'end': 1947},\n", " {'entity': '0',\n", " 'score': 0.9999827,\n", " 'index': 457,\n", " 'word': '▁were',\n", " 'start': 1948,\n", " 'end': 1952},\n", " {'entity': '0',\n", " 'score': 0.9999871,\n", " 'index': 458,\n", " 'word': '▁any',\n", " 'start': 1953,\n", " 'end': 1956},\n", " {'entity': '0',\n", " 'score': 0.9999901,\n", " 'index': 459,\n", " 'word': '▁sign',\n", " 'start': 1957,\n", " 'end': 1961},\n", " {'entity': '0',\n", " 'score': 0.99998987,\n", " 'index': 460,\n", " 'word': 's',\n", " 'start': 1961,\n", " 'end': 1962},\n", " {'entity': '0',\n", " 'score': 0.99998915,\n", " 'index': 461,\n", " 'word': '▁of',\n", " 'start': 1963,\n", " 'end': 1965},\n", " {'entity': '0',\n", " 'score': 0.8491108,\n", " 'index': 462,\n", " 'word': '▁life',\n", " 'start': 1966,\n", " 'end': 1970},\n", " {'entity': '0',\n", " 'score': 0.99971646,\n", " 'index': 463,\n", " 'word': ',',\n", " 'start': 1970,\n", " 'end': 1971},\n", " {'entity': '0',\n", " 'score': 0.9999896,\n", " 'index': 464,\n", " 'word': '▁you',\n", " 'start': 1972,\n", " 'end': 1975},\n", " {'entity': '0',\n", " 'score': 0.9999862,\n", " 'index': 465,\n", " 'word': '▁could',\n", " 'start': 1976,\n", " 'end': 1981},\n", " {'entity': '0',\n", " 'score': 0.999987,\n", " 'index': 466,\n", " 'word': '▁easily',\n", " 'start': 1982,\n", " 'end': 1988},\n", " {'entity': '0',\n", " 'score': 0.99995744,\n", " 'index': 467,\n", " 'word': '▁see',\n", " 'start': 1989,\n", " 'end': 1992},\n", " {'entity': '0',\n", " 'score': 0.99999,\n", " 'index': 468,\n", " 'word': '▁what',\n", " 'start': 1993,\n", " 'end': 1997},\n", " {'entity': '0',\n", " 'score': 0.9999901,\n", " 'index': 469,\n", " 'word': '▁they',\n", " 'start': 1998,\n", " 'end': 2002},\n", " {'entity': '0',\n", " 'score': 0.96247953,\n", " 'index': 470,\n", " 'word': '▁were',\n", " 'start': 2003,\n", " 'end': 2007},\n", " {'entity': '0',\n", " 'score': 0.99191755,\n", " 'index': 471,\n", " 'word': '.',\n", " 'start': 2007,\n", " 'end': 2008},\n", " {'entity': '0',\n", " 'score': 0.9999583,\n", " 'index': 472,\n", " 'word': '▁What',\n", " 'start': 2009,\n", " 'end': 2013},\n", " {'entity': '0',\n", " 'score': 0.99999034,\n", " 'index': 473,\n", " 'word': '▁the',\n", " 'start': 2014,\n", " 'end': 2017},\n", " {'entity': '0',\n", " 'score': 0.99998474,\n", " 'index': 474,\n", " 'word': '▁picture',\n", " 'start': 2018,\n", " 'end': 2025},\n", " {'entity': '0',\n", " 'score': 0.99641967,\n", " 'index': 475,\n", " 'word': '▁showed',\n", " 'start': 2026,\n", " 'end': 2032},\n", " {'entity': '0',\n", " 'score': 0.9996567,\n", " 'index': 476,\n", " 'word': '▁was',\n", " 'start': 2033,\n", " 'end': 2036},\n", " {'entity': '0',\n", " 'score': 0.99991846,\n", " 'index': 477,\n", " 'word': '▁the',\n", " 'start': 2037,\n", " 'end': 2040},\n", " {'entity': '0',\n", " 'score': 0.7575091,\n", " 'index': 478,\n", " 'word': '▁but',\n", " 'start': 2041,\n", " 'end': 2044},\n", " {'entity': '0',\n", " 'score': 0.7503565,\n", " 'index': 479,\n", " 'word': 'te',\n", " 'start': 2044,\n", " 'end': 2046},\n", " {'entity': '0',\n", " 'score': 0.998519,\n", " 'index': 480,\n", " 'word': '▁or',\n", " 'start': 2047,\n", " 'end': 2049},\n", " {'entity': '0',\n", " 'score': 0.9107988,\n", " 'index': 481,\n", " 'word': '▁mesa',\n", " 'start': 2050,\n", " 'end': 2054},\n", " {'entity': '0',\n", " 'score': 0.82752305,\n", " 'index': 482,\n", " 'word': ',',\n", " 'start': 2054,\n", " 'end': 2055},\n", " {'entity': '0',\n", " 'score': 0.99993956,\n", " 'index': 483,\n", " 'word': '▁which',\n", " 'start': 2056,\n", " 'end': 2061},\n", " {'entity': '0',\n", " 'score': 0.9999516,\n", " 'index': 484,\n", " 'word': '▁are',\n", " 'start': 2062,\n", " 'end': 2065},\n", " {'entity': '0',\n", " 'score': 0.99993145,\n", " 'index': 485,\n", " 'word': '▁land',\n", " 'start': 2066,\n", " 'end': 2070},\n", " {'entity': '0',\n", " 'score': 0.9999263,\n", " 'index': 486,\n", " 'word': 'form',\n", " 'start': 2070,\n", " 'end': 2074},\n", " {'entity': '0',\n", " 'score': 0.99993145,\n", " 'index': 487,\n", " 'word': 's',\n", " 'start': 2074,\n", " 'end': 2075},\n", " {'entity': '0',\n", " 'score': 0.99996626,\n", " 'index': 488,\n", " 'word': '▁common',\n", " 'start': 2076,\n", " 'end': 2082},\n", " {'entity': '0',\n", " 'score': 0.9999869,\n", " 'index': 489,\n", " 'word': '▁around',\n", " 'start': 2083,\n", " 'end': 2089},\n", " {'entity': '0',\n", " 'score': 0.99999,\n", " 'index': 490,\n", " 'word': '▁the',\n", " 'start': 2090,\n", " 'end': 2093},\n", " {'entity': '0',\n", " 'score': 0.9999894,\n", " 'index': 491,\n", " 'word': '▁American',\n", " 'start': 2094,\n", " 'end': 2102},\n", " {'entity': '0',\n", " 'score': 0.9730223,\n", " 'index': 492,\n", " 'word': '▁West',\n", " 'start': 2103,\n", " 'end': 2107},\n", " {'entity': '0',\n", " 'score': 0.97686726,\n", " 'index': 493,\n", " 'word': '.',\n", " 'start': 2107,\n", " 'end': 2108}]" ] }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"mukowaty/punctuate-16\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"mukowaty/punctuate-16\")\n", "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "\n", "ner_results = nlp(text)\n", "ner_results" ] }, { "cell_type": "code", "execution_count": 68, "id": "8c60dc0e-210c-4c34-87f4-9d960dc810fa", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", ", 5\n", ". 5\n", "0 483\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word \n", ", ▁No 1\n", " ▁no 1\n", " ▁size 1\n", " ▁wrong 1\n", " ▁yes 1\n", ". 1\n", " ? 1\n", " ze 1\n", " ▁1998. 1\n", " ▁ha 1\n", "0 6\n", " , 34\n", " - 2\n", " . 11\n", " ; 1\n", " But 1\n", " a 1\n", " an 1\n", " bit 1\n", " cer 1\n", " cient 1\n", " craft 1\n", " ct 1\n", " d 3\n", " de 1\n", " do 1\n", " ed 1\n", " er 2\n", " form 3\n", " ing 1\n", " ings 1\n", " ion 1\n", " less 1\n", " ling 1\n", " mble 1\n", " med 1\n", " n 1\n", " ness 1\n", " nia 1\n", " oh 1\n", " out 1\n", " pping 1\n", " re 1\n", " rra 1\n", " s 13\n", " t 2\n", " te 1\n", " ted 2\n", " tion 1\n", " un 1\n", " y 2\n", " ▁ 1\n", " ▁1 1\n", " ▁2001 1\n", " ▁25 1\n", " ▁3 1\n", " ▁5 1\n", " ▁8 1\n", " ▁American 1\n", " ▁April 2\n", " ▁Cy 1\n", " ▁Egypt 1\n", " ▁Face 1\n", " ▁Malin 2\n", " ▁Mars 7\n", " ▁Marti 1\n", " ▁Michael 1\n", " ▁NASA 3\n", " ▁On 1\n", " ▁Or 1\n", " ▁Phar 1\n", " ▁So 1\n", " ▁Some 1\n", " ▁The 1\n", " ▁Us 1\n", " ▁Very 1\n", " ▁Viking 2\n", " ▁We 2\n", " ▁Well 1\n", " ▁West 1\n", " ▁What 1\n", " ▁With 1\n", " ▁a 10\n", " ▁able 1\n", " ▁about 1\n", " ▁absolute 1\n", " ▁ago 1\n", " ▁alien 3\n", " ▁all 3\n", " ▁amazing 1\n", " ▁an 3\n", " ▁and 8\n", " ▁announced 1\n", " ▁another 3\n", " ▁any 1\n", " ▁appeared 1\n", " ▁are 1\n", " ▁around 2\n", " ▁at 2\n", " ▁attention 1\n", " ▁be 2\n", " ▁became 1\n", " ▁because 1\n", " ▁bigger 1\n", " ▁books 1\n", " ▁budget 1\n", " ▁but 4\n", " ▁by 3\n", " ▁camera 3\n", " ▁can 1\n", " ▁capture 1\n", " ▁check 1\n", " ▁circ 1\n", " ▁civiliza 1\n", " ▁clear 1\n", " ▁cloud 1\n", " ▁common 2\n", " ▁correct 1\n", " ▁could 2\n", " ▁created 1\n", " ▁day 1\n", " ▁days 1\n", " ▁decided 2\n", " ▁defender 1\n", " ▁did 1\n", " ▁digital 1\n", " ▁discern 1\n", " ▁easily 1\n", " ▁engage 1\n", " ▁evidence 2\n", " ▁face 4\n", " ▁few 1\n", " ▁figure 1\n", " ▁finding 1\n", " ▁five 1\n", " ▁for 3\n", " ▁formation 1\n", " ▁good 1\n", " ▁gro 1\n", " ▁ha 1\n", " ▁had 1\n", " ▁head 1\n", " ▁hi 1\n", " ▁hidden 1\n", " ▁his 1\n", " ▁huge 1\n", " ▁human 2\n", " ▁icon 1\n", " ▁if 2\n", " ▁image 2\n", " ▁in 3\n", " ▁is 2\n", " ▁it 10\n", " ▁just 3\n", " ▁land 3\n", " ▁later 1\n", " ▁life 3\n", " ▁like 2\n", " ▁lines 1\n", " ▁look 1\n", " ▁made 2\n", " ▁magazine 1\n", " ▁make 1\n", " ▁making 1\n", " ▁mark 1\n", " ▁maximum 1\n", " ▁me 1\n", " ▁mean 1\n", " ▁means 1\n", " ▁meant 1\n", " ▁mesa 2\n", " ▁monument 1\n", " ▁movies 1\n", " ▁natural 2\n", " ▁no 1\n", " ▁note 1\n", " ▁obviously 1\n", " ▁of 5\n", " ▁on 6\n", " ▁one 1\n", " ▁only 2\n", " ▁or 1\n", " ▁original 1\n", " ▁our 1\n", " ▁out 1\n", " ▁people 1\n", " ▁photo 1\n", " ▁photos 2\n", " ▁picture 4\n", " ▁pixel 1\n", " ▁planet 1\n", " ▁pop 1\n", " ▁prove 1\n", " ▁public 1\n", " ▁radio 1\n", " ▁really 1\n", " ▁rese 1\n", " ▁reveal 2\n", " ▁revolution 1\n", " ▁rock 1\n", " ▁rumor 1\n", " ▁scientist 3\n", " ▁see 2\n", " ▁shadow 3\n", " ▁sharp 1\n", " ▁shot 2\n", " ▁should 1\n", " ▁showed 1\n", " ▁shows 1\n", " ▁sign 1\n", " ▁sna 1\n", " ▁soon 1\n", " ▁space 1\n", " ▁spot 1\n", " ▁started 1\n", " ▁store 1\n", " ▁story 1\n", " ▁summer 1\n", " ▁sure 3\n", " ▁take 2\n", " ▁talk 1\n", " ▁team 2\n", " ▁tell 1\n", " ▁ten 1\n", " ▁than 2\n", " ▁that 10\n", " ▁the 16\n", " ▁them 1\n", " ▁there 3\n", " ▁they 1\n", " ▁things 1\n", " ▁this 2\n", " ▁thought 2\n", " ▁times 2\n", " ▁to 10\n", " ▁took 1\n", " ▁twenty 1\n", " ▁us 1\n", " ▁using 1\n", " ▁very 1\n", " ▁wanted 1\n", " ▁was 10\n", " ▁wasn 1\n", " ▁way 1\n", " ▁we 5\n", " ▁were 4\n", " ▁what 1\n", " ▁when 1\n", " ▁which 5\n", " ▁whole 1\n", " ▁wish 1\n", " ▁with 1\n", " ▁would 1\n", " ▁wrong 1\n", " ▁years 2\n", " ▁you 4\n", "dtype: int64" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "\n", "with open(\"34 mukowatypunctuate-16.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()" ] }, { "cell_type": "markdown", "id": "6e67c25d-82fc-494d-95c6-3812228d0d7d", "metadata": {}, "source": [ "## 35 HiTZ/mbert-argmining-abstrct-multilingual" ] }, { "cell_type": "code", "execution_count": 70, "id": "6e8a7262-9e25-4363-bae7-49d477b96175", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 70, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"HiTZ/mbert-argmining-abstrct-multilingual\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"HiTZ/mbert-argmining-abstrct-multilingual\")\n", "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "\n", "ner_results = nlp(text)\n", "ner_results" ] }, { "cell_type": "markdown", "id": "4a12a382-1d1b-440c-b968-d9b307d1b391", "metadata": {}, "source": [ "## 36 benjamin/wtp-canine-s-12l" ] }, { "cell_type": "code", "execution_count": 72, "id": "462a5e9d-56f2-449e-b80c-28e56d8f4000", "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:951\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:653\u001b[0m, in \u001b[0;36m_LazyConfigMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping:\n\u001b[1;32m--> 653\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[0;32m 654\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping[key]\n", "\u001b[1;31mKeyError\u001b[0m: 'la-canine'", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[72], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoTokenizer, AutoModelForTokenClassification\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-12l\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-12l\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m nlp \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:953\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m--> 953\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 954\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe checkpoint you are trying to load has model type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 955\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut Transformers does not recognize this architecture. This could be because of an \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 956\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124missue with the checkpoint, or because your version of Transformers is out of date.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 957\u001b[0m )\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_dict(config_dict, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39munused_kwargs)\n\u001b[0;32m 959\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 960\u001b[0m \u001b[38;5;66;03m# Fallback: use pattern matching on the string.\u001b[39;00m\n\u001b[0;32m 961\u001b[0m \u001b[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001b[39;00m\n", "\u001b[1;31mValueError\u001b[0m: The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date." ] } ], "source": [ "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"benjamin/wtp-canine-s-12l\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"benjamin/wtp-canine-s-12l\")\n", "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "\n", "ner_results = nlp(text)\n", "ner_results" ] }, { "cell_type": "markdown", "id": "b0794928-e326-4f21-9a11-4d9cfa04a373", "metadata": {}, "source": [ "## 37 benjamin/wtp-canine-s-3l-no-adapters" ] }, { "cell_type": "code", "execution_count": 74, "id": "e57f841c-e383-430c-8afd-a954e4bd2483", "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:951\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:653\u001b[0m, in \u001b[0;36m_LazyConfigMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping:\n\u001b[1;32m--> 653\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[0;32m 654\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping[key]\n", "\u001b[1;31mKeyError\u001b[0m: 'la-canine'", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[74], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoTokenizer, AutoModelForTokenClassification\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-3l-no-adapters\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-3l-no-adapters\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m nlp \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:953\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m--> 953\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 954\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe checkpoint you are trying to load has model type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 955\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut Transformers does not recognize this architecture. This could be because of an \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 956\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124missue with the checkpoint, or because your version of Transformers is out of date.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 957\u001b[0m )\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_dict(config_dict, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39munused_kwargs)\n\u001b[0;32m 959\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 960\u001b[0m \u001b[38;5;66;03m# Fallback: use pattern matching on the string.\u001b[39;00m\n\u001b[0;32m 961\u001b[0m \u001b[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001b[39;00m\n", "\u001b[1;31mValueError\u001b[0m: The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date." ] } ], "source": [ "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"benjamin/wtp-canine-s-3l-no-adapters\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"benjamin/wtp-canine-s-3l-no-adapters\")\n", "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "\n", "ner_results = nlp(text)\n", "ner_results" ] }, { "cell_type": "markdown", "id": "72e8fe70-ec5a-43dc-b4a2-aead0b235f75", "metadata": {}, "source": [ "## 38 benjamin/wtp-canine-s-9l-no-adapters" ] }, { "cell_type": "code", "execution_count": 77, "id": "c63de375-015d-4f70-a9f4-bdfa18a6a3cf", "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:951\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 950\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:653\u001b[0m, in \u001b[0;36m_LazyConfigMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping:\n\u001b[1;32m--> 653\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[0;32m 654\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping[key]\n", "\u001b[1;31mKeyError\u001b[0m: 'la-canine'", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[77], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoTokenizer, AutoModelForTokenClassification\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-9l-no-adapters\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbenjamin/wtp-canine-s-9l-no-adapters\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m nlp \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:953\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 951\u001b[0m config_class \u001b[38;5;241m=\u001b[39m CONFIG_MAPPING[config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[0;32m 952\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m--> 953\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 954\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe checkpoint you are trying to load has model type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 955\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut Transformers does not recognize this architecture. This could be because of an \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 956\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124missue with the checkpoint, or because your version of Transformers is out of date.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 957\u001b[0m )\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_dict(config_dict, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39munused_kwargs)\n\u001b[0;32m 959\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 960\u001b[0m \u001b[38;5;66;03m# Fallback: use pattern matching on the string.\u001b[39;00m\n\u001b[0;32m 961\u001b[0m \u001b[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001b[39;00m\n", "\u001b[1;31mValueError\u001b[0m: The checkpoint you are trying to load has model type `la-canine` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date." ] } ], "source": [ "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"benjamin/wtp-canine-s-9l-no-adapters\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"benjamin/wtp-canine-s-9l-no-adapters\")\n", "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "\n", "ner_results = nlp(text)\n", "ner_results" ] }, { "cell_type": "markdown", "id": "16b75f15-739f-45a4-8211-b094fd2b9104", "metadata": {}, "source": [ "## 39 msislam/code-mixed-language-detection-XLMRoberta" ] }, { "cell_type": "code", "execution_count": 79, "id": "e72aafcc-9c45-47c4-bd61-bf0941e523c0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'entity': 'I-EN',\n", " 'score': 0.9999919,\n", " 'index': 1,\n", " 'word': '▁So',\n", " 'start': 0,\n", " 'end': 2},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999356,\n", " 'index': 2,\n", " 'word': ',',\n", " 'start': 2,\n", " 'end': 3},\n", " {'entity': 'I-EN',\n", " 'score': 0.999995,\n", " 'index': 3,\n", " 'word': '▁if',\n", " 'start': 4,\n", " 'end': 6},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 4,\n", " 'word': '▁you',\n", " 'start': 7,\n", " 'end': 10},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999944,\n", " 'index': 5,\n", " 'word': \"'\",\n", " 'start': 10,\n", " 'end': 11},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999535,\n", " 'index': 6,\n", " 'word': 're',\n", " 'start': 11,\n", " 'end': 13},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 7,\n", " 'word': '▁a',\n", " 'start': 14,\n", " 'end': 15},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999945,\n", " 'index': 8,\n", " 'word': '▁NASA',\n", " 'start': 16,\n", " 'end': 20},\n", " {'entity': 'I-EN',\n", " 'score': 0.999995,\n", " 'index': 9,\n", " 'word': '▁scientist',\n", " 'start': 21,\n", " 'end': 30},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 10,\n", " 'word': ',',\n", " 'start': 30,\n", " 'end': 31},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 11,\n", " 'word': '▁you',\n", " 'start': 32,\n", " 'end': 35},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 12,\n", " 'word': '▁should',\n", " 'start': 36,\n", " 'end': 42},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 13,\n", " 'word': '▁be',\n", " 'start': 43,\n", " 'end': 45},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 14,\n", " 'word': '▁able',\n", " 'start': 46,\n", " 'end': 50},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999535,\n", " 'index': 15,\n", " 'word': '▁to',\n", " 'start': 51,\n", " 'end': 53},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 16,\n", " 'word': '▁tell',\n", " 'start': 54,\n", " 'end': 58},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 17,\n", " 'word': '▁me',\n", " 'start': 59,\n", " 'end': 61},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 18,\n", " 'word': '▁the',\n", " 'start': 62,\n", " 'end': 65},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 19,\n", " 'word': '▁whole',\n", " 'start': 66,\n", " 'end': 71},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 20,\n", " 'word': '▁story',\n", " 'start': 72,\n", " 'end': 77},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999535,\n", " 'index': 21,\n", " 'word': '▁about',\n", " 'start': 78,\n", " 'end': 83},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 22,\n", " 'word': '▁the',\n", " 'start': 84,\n", " 'end': 87},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 23,\n", " 'word': '▁Face',\n", " 'start': 88,\n", " 'end': 92},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 24,\n", " 'word': '▁On',\n", " 'start': 93,\n", " 'end': 95},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 25,\n", " 'word': '▁Mars',\n", " 'start': 96,\n", " 'end': 100},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 26,\n", " 'word': ',',\n", " 'start': 100,\n", " 'end': 101},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 27,\n", " 'word': '▁which',\n", " 'start': 102,\n", " 'end': 107},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 28,\n", " 'word': '▁obviously',\n", " 'start': 108,\n", " 'end': 117},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999962,\n", " 'index': 29,\n", " 'word': '▁is',\n", " 'start': 118,\n", " 'end': 120},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 30,\n", " 'word': '▁evidence',\n", " 'start': 121,\n", " 'end': 129},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 31,\n", " 'word': '▁that',\n", " 'start': 130,\n", " 'end': 134},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 32,\n", " 'word': '▁there',\n", " 'start': 135,\n", " 'end': 140},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999607,\n", " 'index': 33,\n", " 'word': '▁is',\n", " 'start': 141,\n", " 'end': 143},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 34,\n", " 'word': '▁life',\n", " 'start': 144,\n", " 'end': 148},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 35,\n", " 'word': '▁on',\n", " 'start': 149,\n", " 'end': 151},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 36,\n", " 'word': '▁Mars',\n", " 'start': 152,\n", " 'end': 156},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 37,\n", " 'word': ',',\n", " 'start': 156,\n", " 'end': 157},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999595,\n", " 'index': 38,\n", " 'word': '▁and',\n", " 'start': 158,\n", " 'end': 161},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 39,\n", " 'word': '▁that',\n", " 'start': 162,\n", " 'end': 166},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999595,\n", " 'index': 40,\n", " 'word': '▁the',\n", " 'start': 167,\n", " 'end': 170},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999595,\n", " 'index': 41,\n", " 'word': '▁face',\n", " 'start': 171,\n", " 'end': 175},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999595,\n", " 'index': 42,\n", " 'word': '▁was',\n", " 'start': 176,\n", " 'end': 179},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 43,\n", " 'word': '▁created',\n", " 'start': 180,\n", " 'end': 187},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 44,\n", " 'word': '▁by',\n", " 'start': 188,\n", " 'end': 190},\n", " {'entity': 'I-EN',\n", " 'score': 0.999995,\n", " 'index': 45,\n", " 'word': '▁alien',\n", " 'start': 191,\n", " 'end': 196},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999595,\n", " 'index': 46,\n", " 'word': 's',\n", " 'start': 196,\n", " 'end': 197},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999962,\n", " 'index': 47,\n", " 'word': ',',\n", " 'start': 197,\n", " 'end': 198},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999356,\n", " 'index': 48,\n", " 'word': '▁correct',\n", " 'start': 199,\n", " 'end': 206},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999927,\n", " 'index': 49,\n", " 'word': '?\"',\n", " 'start': 206,\n", " 'end': 208},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999285,\n", " 'index': 50,\n", " 'word': '▁No',\n", " 'start': 209,\n", " 'end': 211},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 51,\n", " 'word': ',',\n", " 'start': 211,\n", " 'end': 212},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999933,\n", " 'index': 52,\n", " 'word': '▁twenty',\n", " 'start': 213,\n", " 'end': 219},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999404,\n", " 'index': 53,\n", " 'word': '▁five',\n", " 'start': 220,\n", " 'end': 224},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999944,\n", " 'index': 54,\n", " 'word': '▁years',\n", " 'start': 225,\n", " 'end': 230},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 55,\n", " 'word': '▁ago',\n", " 'start': 231,\n", " 'end': 234},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 56,\n", " 'word': ',',\n", " 'start': 234,\n", " 'end': 235},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 57,\n", " 'word': '▁our',\n", " 'start': 236,\n", " 'end': 239},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 58,\n", " 'word': '▁Viking',\n", " 'start': 240,\n", " 'end': 246},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 59,\n", " 'word': '▁1',\n", " 'start': 247,\n", " 'end': 248},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 60,\n", " 'word': '▁space',\n", " 'start': 249,\n", " 'end': 254},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999535,\n", " 'index': 61,\n", " 'word': 'craft',\n", " 'start': 254,\n", " 'end': 259},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999595,\n", " 'index': 62,\n", " 'word': '▁was',\n", " 'start': 260,\n", " 'end': 263},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 63,\n", " 'word': '▁circ',\n", " 'start': 264,\n", " 'end': 268},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 64,\n", " 'word': 'ling',\n", " 'start': 268,\n", " 'end': 272},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 65,\n", " 'word': '▁the',\n", " 'start': 273,\n", " 'end': 276},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999535,\n", " 'index': 66,\n", " 'word': '▁planet',\n", " 'start': 277,\n", " 'end': 283},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 67,\n", " 'word': ',',\n", " 'start': 283,\n", " 'end': 284},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 68,\n", " 'word': '▁sna',\n", " 'start': 285,\n", " 'end': 288},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 69,\n", " 'word': 'pping',\n", " 'start': 288,\n", " 'end': 293},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 70,\n", " 'word': '▁photos',\n", " 'start': 294,\n", " 'end': 300},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 71,\n", " 'word': ',',\n", " 'start': 300,\n", " 'end': 301},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 72,\n", " 'word': '▁when',\n", " 'start': 302,\n", " 'end': 306},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 73,\n", " 'word': '▁it',\n", " 'start': 307,\n", " 'end': 309},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 74,\n", " 'word': '▁spot',\n", " 'start': 310,\n", " 'end': 314},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999945,\n", " 'index': 75,\n", " 'word': 'ted',\n", " 'start': 314,\n", " 'end': 317},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999535,\n", " 'index': 76,\n", " 'word': '▁the',\n", " 'start': 318,\n", " 'end': 321},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 77,\n", " 'word': '▁shadow',\n", " 'start': 322,\n", " 'end': 328},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 78,\n", " 'word': 'y',\n", " 'start': 328,\n", " 'end': 329},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 79,\n", " 'word': '▁like',\n", " 'start': 330,\n", " 'end': 334},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 80,\n", " 'word': 'ness',\n", " 'start': 334,\n", " 'end': 338},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 81,\n", " 'word': '▁of',\n", " 'start': 339,\n", " 'end': 341},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 82,\n", " 'word': '▁a',\n", " 'start': 342,\n", " 'end': 343},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 83,\n", " 'word': '▁human',\n", " 'start': 344,\n", " 'end': 349},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999595,\n", " 'index': 84,\n", " 'word': '▁face',\n", " 'start': 350,\n", " 'end': 354},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999595,\n", " 'index': 85,\n", " 'word': '.',\n", " 'start': 354,\n", " 'end': 355},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999607,\n", " 'index': 86,\n", " 'word': '▁Us',\n", " 'start': 356,\n", " 'end': 358},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 87,\n", " 'word': '▁scientist',\n", " 'start': 359,\n", " 'end': 368},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999962,\n", " 'index': 88,\n", " 'word': 's',\n", " 'start': 368,\n", " 'end': 369},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 89,\n", " 'word': '▁figure',\n", " 'start': 370,\n", " 'end': 376},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 90,\n", " 'word': 'd',\n", " 'start': 376,\n", " 'end': 377},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999939,\n", " 'index': 91,\n", " 'word': '▁out',\n", " 'start': 378,\n", " 'end': 381},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 92,\n", " 'word': '▁that',\n", " 'start': 382,\n", " 'end': 386},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 93,\n", " 'word': '▁it',\n", " 'start': 387,\n", " 'end': 389},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 94,\n", " 'word': '▁was',\n", " 'start': 390,\n", " 'end': 393},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999939,\n", " 'index': 95,\n", " 'word': '▁just',\n", " 'start': 394,\n", " 'end': 398},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 96,\n", " 'word': '▁another',\n", " 'start': 399,\n", " 'end': 406},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 97,\n", " 'word': '▁Marti',\n", " 'start': 407,\n", " 'end': 412},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 98,\n", " 'word': 'an',\n", " 'start': 412,\n", " 'end': 414},\n", " {'entity': 'I-EN',\n", " 'score': 0.999995,\n", " 'index': 99,\n", " 'word': '▁mesa',\n", " 'start': 415,\n", " 'end': 419},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 100,\n", " 'word': ',',\n", " 'start': 419,\n", " 'end': 420},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999939,\n", " 'index': 101,\n", " 'word': '▁common',\n", " 'start': 421,\n", " 'end': 427},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999404,\n", " 'index': 102,\n", " 'word': '▁around',\n", " 'start': 428,\n", " 'end': 434},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 103,\n", " 'word': '▁Cy',\n", " 'start': 435,\n", " 'end': 437},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 104,\n", " 'word': 'do',\n", " 'start': 437,\n", " 'end': 439},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 105,\n", " 'word': 'nia',\n", " 'start': 439,\n", " 'end': 442},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 106,\n", " 'word': ',',\n", " 'start': 442,\n", " 'end': 443},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999943,\n", " 'index': 107,\n", " 'word': '▁only',\n", " 'start': 444,\n", " 'end': 448},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 108,\n", " 'word': '▁this',\n", " 'start': 449,\n", " 'end': 453},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 109,\n", " 'word': '▁one',\n", " 'start': 454,\n", " 'end': 457},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999595,\n", " 'index': 110,\n", " 'word': '▁had',\n", " 'start': 458,\n", " 'end': 461},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 111,\n", " 'word': '▁shadow',\n", " 'start': 462,\n", " 'end': 468},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 112,\n", " 'word': 's',\n", " 'start': 468,\n", " 'end': 469},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 113,\n", " 'word': '▁that',\n", " 'start': 470,\n", " 'end': 474},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 114,\n", " 'word': '▁made',\n", " 'start': 475,\n", " 'end': 479},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999535,\n", " 'index': 115,\n", " 'word': '▁it',\n", " 'start': 480,\n", " 'end': 482},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 116,\n", " 'word': '▁look',\n", " 'start': 483,\n", " 'end': 487},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999595,\n", " 'index': 117,\n", " 'word': '▁like',\n", " 'start': 488,\n", " 'end': 492},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999962,\n", " 'index': 118,\n", " 'word': '▁an',\n", " 'start': 493,\n", " 'end': 495},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999595,\n", " 'index': 119,\n", " 'word': '▁Egypt',\n", " 'start': 496,\n", " 'end': 501},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999595,\n", " 'index': 120,\n", " 'word': 'ion',\n", " 'start': 501,\n", " 'end': 504},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999595,\n", " 'index': 121,\n", " 'word': '▁Phar',\n", " 'start': 505,\n", " 'end': 509},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 122,\n", " 'word': 'a',\n", " 'start': 509,\n", " 'end': 510},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 123,\n", " 'word': 'oh',\n", " 'start': 510,\n", " 'end': 512},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999963,\n", " 'index': 124,\n", " 'word': '.',\n", " 'start': 512,\n", " 'end': 513},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 125,\n", " 'word': '▁Very',\n", " 'start': 514,\n", " 'end': 518},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 126,\n", " 'word': '▁few',\n", " 'start': 519,\n", " 'end': 522},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 127,\n", " 'word': '▁days',\n", " 'start': 523,\n", " 'end': 527},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999535,\n", " 'index': 128,\n", " 'word': '▁later',\n", " 'start': 528,\n", " 'end': 533},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 129,\n", " 'word': ',',\n", " 'start': 533,\n", " 'end': 534},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999607,\n", " 'index': 130,\n", " 'word': '▁we',\n", " 'start': 535,\n", " 'end': 537},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 131,\n", " 'word': '▁reveal',\n", " 'start': 538,\n", " 'end': 544},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999535,\n", " 'index': 132,\n", " 'word': 'ed',\n", " 'start': 544,\n", " 'end': 546},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 133,\n", " 'word': '▁the',\n", " 'start': 547,\n", " 'end': 550},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 134,\n", " 'word': '▁image',\n", " 'start': 551,\n", " 'end': 556},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999595,\n", " 'index': 135,\n", " 'word': '▁for',\n", " 'start': 557,\n", " 'end': 560},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999535,\n", " 'index': 136,\n", " 'word': '▁all',\n", " 'start': 561,\n", " 'end': 564},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 137,\n", " 'word': '▁to',\n", " 'start': 565,\n", " 'end': 567},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 138,\n", " 'word': '▁see',\n", " 'start': 568,\n", " 'end': 571},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999416,\n", " 'index': 139,\n", " 'word': ',',\n", " 'start': 571,\n", " 'end': 572},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 140,\n", " 'word': '▁and',\n", " 'start': 573,\n", " 'end': 576},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 141,\n", " 'word': '▁we',\n", " 'start': 577,\n", " 'end': 579},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 142,\n", " 'word': '▁made',\n", " 'start': 580,\n", " 'end': 584},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 143,\n", " 'word': '▁sure',\n", " 'start': 585,\n", " 'end': 589},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999943,\n", " 'index': 144,\n", " 'word': '▁to',\n", " 'start': 590,\n", " 'end': 592},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 145,\n", " 'word': '▁note',\n", " 'start': 593,\n", " 'end': 597},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 146,\n", " 'word': '▁that',\n", " 'start': 598,\n", " 'end': 602},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 147,\n", " 'word': '▁it',\n", " 'start': 603,\n", " 'end': 605},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 148,\n", " 'word': '▁was',\n", " 'start': 606,\n", " 'end': 609},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999475,\n", " 'index': 149,\n", " 'word': '▁a',\n", " 'start': 610,\n", " 'end': 611},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999595,\n", " 'index': 150,\n", " 'word': '▁huge',\n", " 'start': 612,\n", " 'end': 616},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 151,\n", " 'word': '▁rock',\n", " 'start': 617,\n", " 'end': 621},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 152,\n", " 'word': '▁formation',\n", " 'start': 622,\n", " 'end': 631},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 153,\n", " 'word': '▁that',\n", " 'start': 632,\n", " 'end': 636},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 154,\n", " 'word': '▁just',\n", " 'start': 637,\n", " 'end': 641},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 155,\n", " 'word': '▁rese',\n", " 'start': 642,\n", " 'end': 646},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999945,\n", " 'index': 156,\n", " 'word': 'mble',\n", " 'start': 646,\n", " 'end': 650},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 157,\n", " 'word': 'd',\n", " 'start': 650,\n", " 'end': 651},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 158,\n", " 'word': '▁a',\n", " 'start': 652,\n", " 'end': 653},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999945,\n", " 'index': 159,\n", " 'word': '▁human',\n", " 'start': 654,\n", " 'end': 659},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 160,\n", " 'word': '▁head',\n", " 'start': 660,\n", " 'end': 664},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999595,\n", " 'index': 161,\n", " 'word': '▁and',\n", " 'start': 665,\n", " 'end': 668},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999475,\n", " 'index': 162,\n", " 'word': '▁face',\n", " 'start': 669,\n", " 'end': 673},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999932,\n", " 'index': 163,\n", " 'word': ',',\n", " 'start': 673,\n", " 'end': 674},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 164,\n", " 'word': '▁but',\n", " 'start': 675,\n", " 'end': 678},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999535,\n", " 'index': 165,\n", " 'word': '▁all',\n", " 'start': 679,\n", " 'end': 682},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999475,\n", " 'index': 166,\n", " 'word': '▁of',\n", " 'start': 683,\n", " 'end': 685},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999944,\n", " 'index': 167,\n", " 'word': '▁it',\n", " 'start': 686,\n", " 'end': 688},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 168,\n", " 'word': '▁was',\n", " 'start': 689,\n", " 'end': 692},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999535,\n", " 'index': 169,\n", " 'word': '▁for',\n", " 'start': 693,\n", " 'end': 696},\n", " {'entity': 'I-EN',\n", " 'score': 0.999995,\n", " 'index': 170,\n", " 'word': 'med',\n", " 'start': 696,\n", " 'end': 699},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 171,\n", " 'word': '▁by',\n", " 'start': 700,\n", " 'end': 702},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999945,\n", " 'index': 172,\n", " 'word': '▁shadow',\n", " 'start': 703,\n", " 'end': 709},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 173,\n", " 'word': 's',\n", " 'start': 709,\n", " 'end': 710},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 174,\n", " 'word': '.',\n", " 'start': 710,\n", " 'end': 711},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 175,\n", " 'word': '▁We',\n", " 'start': 712,\n", " 'end': 714},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 176,\n", " 'word': '▁only',\n", " 'start': 715,\n", " 'end': 719},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999963,\n", " 'index': 177,\n", " 'word': '▁announced',\n", " 'start': 720,\n", " 'end': 729},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 178,\n", " 'word': '▁it',\n", " 'start': 730,\n", " 'end': 732},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999595,\n", " 'index': 179,\n", " 'word': '▁because',\n", " 'start': 733,\n", " 'end': 740},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 180,\n", " 'word': '▁we',\n", " 'start': 741,\n", " 'end': 743},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 181,\n", " 'word': '▁thought',\n", " 'start': 744,\n", " 'end': 751},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 182,\n", " 'word': '▁it',\n", " 'start': 752,\n", " 'end': 754},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 183,\n", " 'word': '▁would',\n", " 'start': 755,\n", " 'end': 760},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 184,\n", " 'word': '▁be',\n", " 'start': 761,\n", " 'end': 763},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999943,\n", " 'index': 185,\n", " 'word': '▁a',\n", " 'start': 764,\n", " 'end': 765},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 186,\n", " 'word': '▁good',\n", " 'start': 766,\n", " 'end': 770},\n", " {'entity': 'I-EN',\n", " 'score': 0.999995,\n", " 'index': 187,\n", " 'word': '▁way',\n", " 'start': 771,\n", " 'end': 774},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999945,\n", " 'index': 188,\n", " 'word': '▁to',\n", " 'start': 775,\n", " 'end': 777},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 189,\n", " 'word': '▁engage',\n", " 'start': 778,\n", " 'end': 784},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999945,\n", " 'index': 190,\n", " 'word': '▁the',\n", " 'start': 785,\n", " 'end': 788},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999535,\n", " 'index': 191,\n", " 'word': '▁public',\n", " 'start': 789,\n", " 'end': 795},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 192,\n", " 'word': '▁with',\n", " 'start': 796,\n", " 'end': 800},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 193,\n", " 'word': '▁NASA',\n", " 'start': 801,\n", " 'end': 805},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 194,\n", " 'word': \"'\",\n", " 'start': 805,\n", " 'end': 806},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 195,\n", " 'word': 's',\n", " 'start': 806,\n", " 'end': 807},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 196,\n", " 'word': '▁finding',\n", " 'start': 808,\n", " 'end': 815},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999939,\n", " 'index': 197,\n", " 'word': 's',\n", " 'start': 815,\n", " 'end': 816},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999945,\n", " 'index': 198,\n", " 'word': ',',\n", " 'start': 816,\n", " 'end': 817},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999962,\n", " 'index': 199,\n", " 'word': '▁and',\n", " 'start': 818,\n", " 'end': 821},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999595,\n", " 'index': 200,\n", " 'word': '▁at',\n", " 'start': 822,\n", " 'end': 824},\n", " {'entity': 'I-EN',\n", " 'score': 0.999995,\n", " 'index': 201,\n", " 'word': 'rra',\n", " 'start': 824,\n", " 'end': 827},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 202,\n", " 'word': 'ct',\n", " 'start': 827,\n", " 'end': 829},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 203,\n", " 'word': '▁attention',\n", " 'start': 830,\n", " 'end': 839},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999595,\n", " 'index': 204,\n", " 'word': '▁to',\n", " 'start': 840,\n", " 'end': 842},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999607,\n", " 'index': 205,\n", " 'word': '▁Mars',\n", " 'start': 843,\n", " 'end': 847},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999964,\n", " 'index': 206,\n", " 'word': '-',\n", " 'start': 847,\n", " 'end': 848},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999963,\n", " 'index': 207,\n", " 'word': '-',\n", " 'start': 848,\n", " 'end': 849},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999963,\n", " 'index': 208,\n", " 'word': '▁and',\n", " 'start': 850,\n", " 'end': 853},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 209,\n", " 'word': '▁it',\n", " 'start': 854,\n", " 'end': 856},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 210,\n", " 'word': '▁did',\n", " 'start': 857,\n", " 'end': 860},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 211,\n", " 'word': '.',\n", " 'start': 860,\n", " 'end': 861},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999913,\n", " 'index': 212,\n", " 'word': '▁The',\n", " 'start': 863,\n", " 'end': 866},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 213,\n", " 'word': '▁face',\n", " 'start': 867,\n", " 'end': 871},\n", " {'entity': 'I-EN',\n", " 'score': 0.999995,\n", " 'index': 214,\n", " 'word': '▁on',\n", " 'start': 872,\n", " 'end': 874},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 215,\n", " 'word': '▁Mars',\n", " 'start': 875,\n", " 'end': 879},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 216,\n", " 'word': '▁soon',\n", " 'start': 880,\n", " 'end': 884},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 217,\n", " 'word': '▁became',\n", " 'start': 885,\n", " 'end': 891},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999938,\n", " 'index': 218,\n", " 'word': '▁a',\n", " 'start': 892,\n", " 'end': 893},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 219,\n", " 'word': '▁pop',\n", " 'start': 894,\n", " 'end': 897},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999475,\n", " 'index': 220,\n", " 'word': '▁icon',\n", " 'start': 898,\n", " 'end': 902},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 221,\n", " 'word': ';',\n", " 'start': 902,\n", " 'end': 903},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 222,\n", " 'word': '▁shot',\n", " 'start': 904,\n", " 'end': 908},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 223,\n", " 'word': '▁in',\n", " 'start': 909,\n", " 'end': 911},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 224,\n", " 'word': '▁movies',\n", " 'start': 912,\n", " 'end': 918},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999927,\n", " 'index': 225,\n", " 'word': ',',\n", " 'start': 918,\n", " 'end': 919},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999944,\n", " 'index': 226,\n", " 'word': '▁appeared',\n", " 'start': 920,\n", " 'end': 928},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999356,\n", " 'index': 227,\n", " 'word': '▁in',\n", " 'start': 929,\n", " 'end': 931},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999939,\n", " 'index': 228,\n", " 'word': '▁books',\n", " 'start': 932,\n", " 'end': 937},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999908,\n", " 'index': 229,\n", " 'word': ',',\n", " 'start': 937,\n", " 'end': 938},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999344,\n", " 'index': 230,\n", " 'word': '▁magazine',\n", " 'start': 939,\n", " 'end': 947},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999933,\n", " 'index': 231,\n", " 'word': 's',\n", " 'start': 947,\n", " 'end': 948},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999908,\n", " 'index': 232,\n", " 'word': ',',\n", " 'start': 948,\n", " 'end': 949},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999943,\n", " 'index': 233,\n", " 'word': '▁radio',\n", " 'start': 950,\n", " 'end': 955},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999931,\n", " 'index': 234,\n", " 'word': '▁talk',\n", " 'start': 956,\n", " 'end': 960},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999943,\n", " 'index': 235,\n", " 'word': '▁shows',\n", " 'start': 961,\n", " 'end': 966},\n", " {'entity': 'I-EN',\n", " 'score': 0.999987,\n", " 'index': 236,\n", " 'word': ',',\n", " 'start': 966,\n", " 'end': 967},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999937,\n", " 'index': 237,\n", " 'word': '▁and',\n", " 'start': 968,\n", " 'end': 971},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 238,\n", " 'word': '▁ha',\n", " 'start': 972,\n", " 'end': 974},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999939,\n", " 'index': 239,\n", " 'word': 'un',\n", " 'start': 974,\n", " 'end': 976},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999945,\n", " 'index': 240,\n", " 'word': 'ted',\n", " 'start': 976,\n", " 'end': 979},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 241,\n", " 'word': '▁gro',\n", " 'start': 980,\n", " 'end': 983},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999938,\n", " 'index': 242,\n", " 'word': 'cer',\n", " 'start': 983,\n", " 'end': 986},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999285,\n", " 'index': 243,\n", " 'word': 'y',\n", " 'start': 986,\n", " 'end': 987},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999945,\n", " 'index': 244,\n", " 'word': '▁store',\n", " 'start': 988,\n", " 'end': 993},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999943,\n", " 'index': 245,\n", " 'word': '▁check',\n", " 'start': 994,\n", " 'end': 999},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999932,\n", " 'index': 246,\n", " 'word': 'out',\n", " 'start': 999,\n", " 'end': 1002},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999938,\n", " 'index': 247,\n", " 'word': '▁lines',\n", " 'start': 1003,\n", " 'end': 1008},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 248,\n", " 'word': '▁for',\n", " 'start': 1009,\n", " 'end': 1012},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 249,\n", " 'word': '▁25',\n", " 'start': 1013,\n", " 'end': 1015},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 250,\n", " 'word': '▁years',\n", " 'start': 1016,\n", " 'end': 1021},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 251,\n", " 'word': '.',\n", " 'start': 1021,\n", " 'end': 1022},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 252,\n", " 'word': '▁Some',\n", " 'start': 1023,\n", " 'end': 1027},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 253,\n", " 'word': '▁people',\n", " 'start': 1028,\n", " 'end': 1034},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999944,\n", " 'index': 254,\n", " 'word': '▁thought',\n", " 'start': 1035,\n", " 'end': 1042},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999344,\n", " 'index': 255,\n", " 'word': '▁the',\n", " 'start': 1043,\n", " 'end': 1046},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999937,\n", " 'index': 256,\n", " 'word': '▁natural',\n", " 'start': 1047,\n", " 'end': 1054},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 257,\n", " 'word': '▁land',\n", " 'start': 1055,\n", " 'end': 1059},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999944,\n", " 'index': 258,\n", " 'word': 'form',\n", " 'start': 1059,\n", " 'end': 1063},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 259,\n", " 'word': '▁was',\n", " 'start': 1064,\n", " 'end': 1067},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999943,\n", " 'index': 260,\n", " 'word': '▁evidence',\n", " 'start': 1068,\n", " 'end': 1076},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999937,\n", " 'index': 261,\n", " 'word': '▁of',\n", " 'start': 1077,\n", " 'end': 1079},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 262,\n", " 'word': '▁life',\n", " 'start': 1080,\n", " 'end': 1084},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999944,\n", " 'index': 263,\n", " 'word': '▁on',\n", " 'start': 1085,\n", " 'end': 1087},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 264,\n", " 'word': '▁Mars',\n", " 'start': 1088,\n", " 'end': 1092},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999933,\n", " 'index': 265,\n", " 'word': ',',\n", " 'start': 1092,\n", " 'end': 1093},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999945,\n", " 'index': 266,\n", " 'word': '▁and',\n", " 'start': 1094,\n", " 'end': 1097},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999404,\n", " 'index': 267,\n", " 'word': '▁that',\n", " 'start': 1098,\n", " 'end': 1102},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 268,\n", " 'word': '▁us',\n", " 'start': 1103,\n", " 'end': 1105},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999945,\n", " 'index': 269,\n", " 'word': '▁scientist',\n", " 'start': 1106,\n", " 'end': 1115},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999945,\n", " 'index': 270,\n", " 'word': 's',\n", " 'start': 1115,\n", " 'end': 1116},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 271,\n", " 'word': '▁wanted',\n", " 'start': 1117,\n", " 'end': 1123},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999416,\n", " 'index': 272,\n", " 'word': '▁to',\n", " 'start': 1124,\n", " 'end': 1126},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999944,\n", " 'index': 273,\n", " 'word': '▁hi',\n", " 'start': 1127,\n", " 'end': 1129},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999945,\n", " 'index': 274,\n", " 'word': 'de',\n", " 'start': 1129,\n", " 'end': 1131},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999945,\n", " 'index': 275,\n", " 'word': '▁it',\n", " 'start': 1132,\n", " 'end': 1134},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999356,\n", " 'index': 276,\n", " 'word': ',',\n", " 'start': 1134,\n", " 'end': 1135},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 277,\n", " 'word': '▁but',\n", " 'start': 1136,\n", " 'end': 1139},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 278,\n", " 'word': '▁really',\n", " 'start': 1140,\n", " 'end': 1146},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999931,\n", " 'index': 279,\n", " 'word': ',',\n", " 'start': 1146,\n", " 'end': 1147},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999932,\n", " 'index': 280,\n", " 'word': '▁the',\n", " 'start': 1148,\n", " 'end': 1151},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999937,\n", " 'index': 281,\n", " 'word': '▁defender',\n", " 'start': 1152,\n", " 'end': 1160},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999297,\n", " 'index': 282,\n", " 'word': 's',\n", " 'start': 1160,\n", " 'end': 1161},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999285,\n", " 'index': 283,\n", " 'word': '▁of',\n", " 'start': 1162,\n", " 'end': 1164},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999912,\n", " 'index': 284,\n", " 'word': '▁the',\n", " 'start': 1165,\n", " 'end': 1168},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999945,\n", " 'index': 285,\n", " 'word': '▁NASA',\n", " 'start': 1169,\n", " 'end': 1173},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999416,\n", " 'index': 286,\n", " 'word': '▁budget',\n", " 'start': 1174,\n", " 'end': 1180},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999416,\n", " 'index': 287,\n", " 'word': '▁wish',\n", " 'start': 1181,\n", " 'end': 1185},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 288,\n", " 'word': '▁there',\n", " 'start': 1186,\n", " 'end': 1191},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999907,\n", " 'index': 289,\n", " 'word': '▁was',\n", " 'start': 1192,\n", " 'end': 1195},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999213,\n", " 'index': 290,\n", " 'word': '▁an',\n", " 'start': 1196,\n", " 'end': 1198},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999931,\n", " 'index': 291,\n", " 'word': 'cient',\n", " 'start': 1198,\n", " 'end': 1203},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 292,\n", " 'word': '▁civiliza',\n", " 'start': 1204,\n", " 'end': 1212},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999924,\n", " 'index': 293,\n", " 'word': 'tion',\n", " 'start': 1212,\n", " 'end': 1216},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999475,\n", " 'index': 294,\n", " 'word': '▁on',\n", " 'start': 1217,\n", " 'end': 1219},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 295,\n", " 'word': '▁Mars',\n", " 'start': 1220,\n", " 'end': 1224},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 296,\n", " 'word': '.',\n", " 'start': 1224,\n", " 'end': 1225},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999404,\n", " 'index': 297,\n", " 'word': '▁We',\n", " 'start': 1226,\n", " 'end': 1228},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 298,\n", " 'word': '▁decided',\n", " 'start': 1229,\n", " 'end': 1236},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999924,\n", " 'index': 299,\n", " 'word': '▁to',\n", " 'start': 1237,\n", " 'end': 1239},\n", " {'entity': 'I-EN',\n", " 'score': 0.999995,\n", " 'index': 300,\n", " 'word': '▁take',\n", " 'start': 1240,\n", " 'end': 1244},\n", " {'entity': 'I-EN',\n", " 'score': 0.999995,\n", " 'index': 301,\n", " 'word': '▁another',\n", " 'start': 1245,\n", " 'end': 1252},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999416,\n", " 'index': 302,\n", " 'word': '▁shot',\n", " 'start': 1253,\n", " 'end': 1257},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999344,\n", " 'index': 303,\n", " 'word': '▁just',\n", " 'start': 1258,\n", " 'end': 1262},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999939,\n", " 'index': 304,\n", " 'word': '▁to',\n", " 'start': 1263,\n", " 'end': 1265},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999404,\n", " 'index': 305,\n", " 'word': '▁make',\n", " 'start': 1266,\n", " 'end': 1270},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999356,\n", " 'index': 306,\n", " 'word': '▁sure',\n", " 'start': 1271,\n", " 'end': 1275},\n", " {'entity': 'I-EN',\n", " 'score': 0.999995,\n", " 'index': 307,\n", " 'word': '▁we',\n", " 'start': 1276,\n", " 'end': 1278},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999404,\n", " 'index': 308,\n", " 'word': '▁were',\n", " 'start': 1279,\n", " 'end': 1283},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999939,\n", " 'index': 309,\n", " 'word': 'n',\n", " 'start': 1283,\n", " 'end': 1284},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999931,\n", " 'index': 310,\n", " 'word': \"'\",\n", " 'start': 1284,\n", " 'end': 1285},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999404,\n", " 'index': 311,\n", " 'word': 't',\n", " 'start': 1285,\n", " 'end': 1286},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999297,\n", " 'index': 312,\n", " 'word': '▁wrong',\n", " 'start': 1287,\n", " 'end': 1292},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999535,\n", " 'index': 313,\n", " 'word': ',',\n", " 'start': 1292,\n", " 'end': 1293},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999944,\n", " 'index': 314,\n", " 'word': '▁on',\n", " 'start': 1294,\n", " 'end': 1296},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999937,\n", " 'index': 315,\n", " 'word': '▁April',\n", " 'start': 1297,\n", " 'end': 1302},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999285,\n", " 'index': 316,\n", " 'word': '▁5',\n", " 'start': 1303,\n", " 'end': 1304},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999356,\n", " 'index': 317,\n", " 'word': ',',\n", " 'start': 1304,\n", " 'end': 1305},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999356,\n", " 'index': 318,\n", " 'word': '▁1998.',\n", " 'start': 1306,\n", " 'end': 1311},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999875,\n", " 'index': 319,\n", " 'word': '▁Michael',\n", " 'start': 1312,\n", " 'end': 1319},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999225,\n", " 'index': 320,\n", " 'word': '▁Malin',\n", " 'start': 1320,\n", " 'end': 1325},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999944,\n", " 'index': 321,\n", " 'word': '▁and',\n", " 'start': 1326,\n", " 'end': 1329},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999475,\n", " 'index': 322,\n", " 'word': '▁his',\n", " 'start': 1330,\n", " 'end': 1333},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999926,\n", " 'index': 323,\n", " 'word': '▁Mars',\n", " 'start': 1334,\n", " 'end': 1338},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999475,\n", " 'index': 324,\n", " 'word': '▁Or',\n", " 'start': 1339,\n", " 'end': 1341},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999938,\n", " 'index': 325,\n", " 'word': 'bit',\n", " 'start': 1341,\n", " 'end': 1344},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999944,\n", " 'index': 326,\n", " 'word': 'er',\n", " 'start': 1344,\n", " 'end': 1346},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 327,\n", " 'word': '▁camera',\n", " 'start': 1347,\n", " 'end': 1353},\n", " {'entity': 'I-EN',\n", " 'score': 0.999995,\n", " 'index': 328,\n", " 'word': '▁team',\n", " 'start': 1354,\n", " 'end': 1358},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 329,\n", " 'word': '▁took',\n", " 'start': 1359,\n", " 'end': 1363},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999943,\n", " 'index': 330,\n", " 'word': '▁a',\n", " 'start': 1364,\n", " 'end': 1365},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 331,\n", " 'word': '▁picture',\n", " 'start': 1366,\n", " 'end': 1373},\n", " {'entity': 'I-EN',\n", " 'score': 0.999995,\n", " 'index': 332,\n", " 'word': '▁that',\n", " 'start': 1374,\n", " 'end': 1378},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 333,\n", " 'word': '▁was',\n", " 'start': 1379,\n", " 'end': 1382},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999475,\n", " 'index': 334,\n", " 'word': '▁ten',\n", " 'start': 1383,\n", " 'end': 1386},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 335,\n", " 'word': '▁times',\n", " 'start': 1387,\n", " 'end': 1392},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999416,\n", " 'index': 336,\n", " 'word': '▁sharp',\n", " 'start': 1393,\n", " 'end': 1398},\n", " {'entity': 'I-EN',\n", " 'score': 0.999995,\n", " 'index': 337,\n", " 'word': 'er',\n", " 'start': 1398,\n", " 'end': 1400},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 338,\n", " 'word': '▁than',\n", " 'start': 1401,\n", " 'end': 1405},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999475,\n", " 'index': 339,\n", " 'word': '▁the',\n", " 'start': 1406,\n", " 'end': 1409},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 340,\n", " 'word': '▁original',\n", " 'start': 1410,\n", " 'end': 1418},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 341,\n", " 'word': '▁Viking',\n", " 'start': 1419,\n", " 'end': 1425},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 342,\n", " 'word': '▁photos',\n", " 'start': 1426,\n", " 'end': 1432},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999285,\n", " 'index': 343,\n", " 'word': ',',\n", " 'start': 1432,\n", " 'end': 1433},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 344,\n", " 'word': '▁reveal',\n", " 'start': 1434,\n", " 'end': 1440},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 345,\n", " 'word': 'ing',\n", " 'start': 1440,\n", " 'end': 1443},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999943,\n", " 'index': 346,\n", " 'word': '▁a',\n", " 'start': 1444,\n", " 'end': 1445},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 347,\n", " 'word': '▁natural',\n", " 'start': 1446,\n", " 'end': 1453},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 348,\n", " 'word': '▁land',\n", " 'start': 1454,\n", " 'end': 1458},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999416,\n", " 'index': 349,\n", " 'word': 'form',\n", " 'start': 1458,\n", " 'end': 1462},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999925,\n", " 'index': 350,\n", " 'word': ',',\n", " 'start': 1462,\n", " 'end': 1463},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999938,\n", " 'index': 351,\n", " 'word': '▁which',\n", " 'start': 1464,\n", " 'end': 1469},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 352,\n", " 'word': '▁meant',\n", " 'start': 1470,\n", " 'end': 1475},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 353,\n", " 'word': '▁no',\n", " 'start': 1476,\n", " 'end': 1478},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999475,\n", " 'index': 354,\n", " 'word': '▁alien',\n", " 'start': 1479,\n", " 'end': 1484},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999416,\n", " 'index': 355,\n", " 'word': '▁monument',\n", " 'start': 1485,\n", " 'end': 1493},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 356,\n", " 'word': '.',\n", " 'start': 1493,\n", " 'end': 1494},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999344,\n", " 'index': 357,\n", " 'word': '▁\"',\n", " 'start': 1495,\n", " 'end': 1496},\n", " {'entity': 'I-EN',\n", " 'score': 0.999995,\n", " 'index': 358,\n", " 'word': 'But',\n", " 'start': 1496,\n", " 'end': 1499},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 359,\n", " 'word': '▁that',\n", " 'start': 1500,\n", " 'end': 1504},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 360,\n", " 'word': '▁picture',\n", " 'start': 1505,\n", " 'end': 1512},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 361,\n", " 'word': '▁wasn',\n", " 'start': 1513,\n", " 'end': 1517},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999475,\n", " 'index': 362,\n", " 'word': \"'\",\n", " 'start': 1517,\n", " 'end': 1518},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999535,\n", " 'index': 363,\n", " 'word': 't',\n", " 'start': 1518,\n", " 'end': 1519},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 364,\n", " 'word': '▁very',\n", " 'start': 1520,\n", " 'end': 1524},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 365,\n", " 'word': '▁clear',\n", " 'start': 1525,\n", " 'end': 1530},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999939,\n", " 'index': 366,\n", " 'word': '▁at',\n", " 'start': 1531,\n", " 'end': 1533},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 367,\n", " 'word': '▁all',\n", " 'start': 1534,\n", " 'end': 1537},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 368,\n", " 'word': ',',\n", " 'start': 1537,\n", " 'end': 1538},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 369,\n", " 'word': '▁which',\n", " 'start': 1539,\n", " 'end': 1544},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 370,\n", " 'word': '▁could',\n", " 'start': 1545,\n", " 'end': 1550},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 371,\n", " 'word': '▁mean',\n", " 'start': 1551,\n", " 'end': 1555},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 372,\n", " 'word': '▁alien',\n", " 'start': 1556,\n", " 'end': 1561},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 373,\n", " 'word': '▁mark',\n", " 'start': 1562,\n", " 'end': 1566},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 374,\n", " 'word': 'ings',\n", " 'start': 1566,\n", " 'end': 1570},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 375,\n", " 'word': '▁were',\n", " 'start': 1571,\n", " 'end': 1575},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 376,\n", " 'word': '▁hidden',\n", " 'start': 1576,\n", " 'end': 1582},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 377,\n", " 'word': '▁by',\n", " 'start': 1583,\n", " 'end': 1585},\n", " {'entity': 'I-EN',\n", " 'score': 0.999995,\n", " 'index': 378,\n", " 'word': '▁ha',\n", " 'start': 1586,\n", " 'end': 1588},\n", " {'entity': 'I-EN',\n", " 'score': 0.999995,\n", " 'index': 379,\n", " 'word': 'ze',\n", " 'start': 1588,\n", " 'end': 1590},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999404,\n", " 'index': 380,\n", " 'word': '\"',\n", " 'start': 1590,\n", " 'end': 1591},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999938,\n", " 'index': 381,\n", " 'word': '▁Well',\n", " 'start': 1592,\n", " 'end': 1596},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 382,\n", " 'word': '▁no',\n", " 'start': 1597,\n", " 'end': 1599},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999958,\n", " 'index': 383,\n", " 'word': ',',\n", " 'start': 1599,\n", " 'end': 1600},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 384,\n", " 'word': '▁yes',\n", " 'start': 1601,\n", " 'end': 1604},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 385,\n", " 'word': '▁that',\n", " 'start': 1605,\n", " 'end': 1609},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 386,\n", " 'word': '▁rumor',\n", " 'start': 1610,\n", " 'end': 1615},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999535,\n", " 'index': 387,\n", " 'word': '▁started',\n", " 'start': 1616,\n", " 'end': 1623},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999535,\n", " 'index': 388,\n", " 'word': ',',\n", " 'start': 1623,\n", " 'end': 1624},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999944,\n", " 'index': 389,\n", " 'word': '▁but',\n", " 'start': 1625,\n", " 'end': 1628},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999535,\n", " 'index': 390,\n", " 'word': '▁to',\n", " 'start': 1629,\n", " 'end': 1631},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 391,\n", " 'word': '▁prove',\n", " 'start': 1632,\n", " 'end': 1637},\n", " {'entity': 'I-EN',\n", " 'score': 0.999995,\n", " 'index': 392,\n", " 'word': '▁them',\n", " 'start': 1638,\n", " 'end': 1642},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999356,\n", " 'index': 393,\n", " 'word': '▁wrong',\n", " 'start': 1643,\n", " 'end': 1648},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 394,\n", " 'word': '▁on',\n", " 'start': 1649,\n", " 'end': 1651},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 395,\n", " 'word': '▁April',\n", " 'start': 1652,\n", " 'end': 1657},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999945,\n", " 'index': 396,\n", " 'word': '▁8',\n", " 'start': 1658,\n", " 'end': 1659},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999944,\n", " 'index': 397,\n", " 'word': ',',\n", " 'start': 1659,\n", " 'end': 1660},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 398,\n", " 'word': '▁2001',\n", " 'start': 1661,\n", " 'end': 1665},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 399,\n", " 'word': '▁we',\n", " 'start': 1666,\n", " 'end': 1668},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 400,\n", " 'word': '▁decided',\n", " 'start': 1669,\n", " 'end': 1676},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999944,\n", " 'index': 401,\n", " 'word': '▁to',\n", " 'start': 1677,\n", " 'end': 1679},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 402,\n", " 'word': '▁take',\n", " 'start': 1680,\n", " 'end': 1684},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 403,\n", " 'word': '▁another',\n", " 'start': 1685,\n", " 'end': 1692},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999535,\n", " 'index': 404,\n", " 'word': '▁picture',\n", " 'start': 1693,\n", " 'end': 1700},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999944,\n", " 'index': 405,\n", " 'word': ',',\n", " 'start': 1700,\n", " 'end': 1701},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 406,\n", " 'word': '▁making',\n", " 'start': 1702,\n", " 'end': 1708},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999475,\n", " 'index': 407,\n", " 'word': '▁sure',\n", " 'start': 1709,\n", " 'end': 1713},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 408,\n", " 'word': '▁it',\n", " 'start': 1714,\n", " 'end': 1716},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 409,\n", " 'word': '▁was',\n", " 'start': 1717,\n", " 'end': 1720},\n", " {'entity': 'I-EN',\n", " 'score': 0.999995,\n", " 'index': 410,\n", " 'word': '▁a',\n", " 'start': 1721,\n", " 'end': 1722},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 411,\n", " 'word': '▁cloud',\n", " 'start': 1723,\n", " 'end': 1728},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 412,\n", " 'word': 'less',\n", " 'start': 1728,\n", " 'end': 1732},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 413,\n", " 'word': '▁summer',\n", " 'start': 1733,\n", " 'end': 1739},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 414,\n", " 'word': '▁day',\n", " 'start': 1740,\n", " 'end': 1743},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999404,\n", " 'index': 415,\n", " 'word': '.',\n", " 'start': 1743,\n", " 'end': 1744},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999913,\n", " 'index': 416,\n", " 'word': '▁Malin',\n", " 'start': 1745,\n", " 'end': 1750},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999939,\n", " 'index': 417,\n", " 'word': \"'\",\n", " 'start': 1750,\n", " 'end': 1751},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999356,\n", " 'index': 418,\n", " 'word': 's',\n", " 'start': 1751,\n", " 'end': 1752},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999944,\n", " 'index': 419,\n", " 'word': '▁team',\n", " 'start': 1753,\n", " 'end': 1757},\n", " {'entity': 'I-EN',\n", " 'score': 0.999995,\n", " 'index': 420,\n", " 'word': '▁capture',\n", " 'start': 1758,\n", " 'end': 1765},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 421,\n", " 'word': 'd',\n", " 'start': 1765,\n", " 'end': 1766},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999944,\n", " 'index': 422,\n", " 'word': '▁an',\n", " 'start': 1767,\n", " 'end': 1769},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 423,\n", " 'word': '▁amazing',\n", " 'start': 1770,\n", " 'end': 1777},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 424,\n", " 'word': '▁photo',\n", " 'start': 1778,\n", " 'end': 1783},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 425,\n", " 'word': '▁using',\n", " 'start': 1784,\n", " 'end': 1789},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999943,\n", " 'index': 426,\n", " 'word': '▁the',\n", " 'start': 1790,\n", " 'end': 1793},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999475,\n", " 'index': 427,\n", " 'word': '▁camera',\n", " 'start': 1794,\n", " 'end': 1800},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 428,\n", " 'word': \"'\",\n", " 'start': 1800,\n", " 'end': 1801},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 429,\n", " 'word': 's',\n", " 'start': 1801,\n", " 'end': 1802},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 430,\n", " 'word': '▁absolute',\n", " 'start': 1803,\n", " 'end': 1811},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999945,\n", " 'index': 431,\n", " 'word': '▁maximum',\n", " 'start': 1812,\n", " 'end': 1819},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999404,\n", " 'index': 432,\n", " 'word': '▁revolution',\n", " 'start': 1820,\n", " 'end': 1830},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999404,\n", " 'index': 433,\n", " 'word': '.',\n", " 'start': 1830,\n", " 'end': 1831},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 434,\n", " 'word': '▁With',\n", " 'start': 1832,\n", " 'end': 1836},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999475,\n", " 'index': 435,\n", " 'word': '▁this',\n", " 'start': 1837,\n", " 'end': 1841},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999475,\n", " 'index': 436,\n", " 'word': '▁camera',\n", " 'start': 1842,\n", " 'end': 1848},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999416,\n", " 'index': 437,\n", " 'word': '▁you',\n", " 'start': 1849,\n", " 'end': 1852},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999945,\n", " 'index': 438,\n", " 'word': '▁can',\n", " 'start': 1853,\n", " 'end': 1856},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999933,\n", " 'index': 439,\n", " 'word': '▁discern',\n", " 'start': 1857,\n", " 'end': 1864},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 440,\n", " 'word': '▁things',\n", " 'start': 1865,\n", " 'end': 1871},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999416,\n", " 'index': 441,\n", " 'word': '▁in',\n", " 'start': 1872,\n", " 'end': 1874},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999404,\n", " 'index': 442,\n", " 'word': '▁a',\n", " 'start': 1875,\n", " 'end': 1876},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999937,\n", " 'index': 443,\n", " 'word': '▁digital',\n", " 'start': 1877,\n", " 'end': 1884},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999937,\n", " 'index': 444,\n", " 'word': '▁image',\n", " 'start': 1885,\n", " 'end': 1890},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999939,\n", " 'index': 445,\n", " 'word': ',',\n", " 'start': 1890,\n", " 'end': 1891},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 446,\n", " 'word': '▁3',\n", " 'start': 1892,\n", " 'end': 1893},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999416,\n", " 'index': 447,\n", " 'word': '▁times',\n", " 'start': 1894,\n", " 'end': 1899},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999937,\n", " 'index': 448,\n", " 'word': '▁bigger',\n", " 'start': 1900,\n", " 'end': 1906},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999931,\n", " 'index': 449,\n", " 'word': '▁than',\n", " 'start': 1907,\n", " 'end': 1911},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999356,\n", " 'index': 450,\n", " 'word': '▁the',\n", " 'start': 1912,\n", " 'end': 1915},\n", " {'entity': 'I-EN',\n", " 'score': 0.999992,\n", " 'index': 451,\n", " 'word': '▁pixel',\n", " 'start': 1916,\n", " 'end': 1921},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999927,\n", " 'index': 452,\n", " 'word': '▁size',\n", " 'start': 1922,\n", " 'end': 1926},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 453,\n", " 'word': '▁which',\n", " 'start': 1927,\n", " 'end': 1932},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999944,\n", " 'index': 454,\n", " 'word': '▁means',\n", " 'start': 1933,\n", " 'end': 1938},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999943,\n", " 'index': 455,\n", " 'word': '▁if',\n", " 'start': 1939,\n", " 'end': 1941},\n", " {'entity': 'I-EN',\n", " 'score': 0.999992,\n", " 'index': 456,\n", " 'word': '▁there',\n", " 'start': 1942,\n", " 'end': 1947},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999931,\n", " 'index': 457,\n", " 'word': '▁were',\n", " 'start': 1948,\n", " 'end': 1952},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999938,\n", " 'index': 458,\n", " 'word': '▁any',\n", " 'start': 1953,\n", " 'end': 1956},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999927,\n", " 'index': 459,\n", " 'word': '▁sign',\n", " 'start': 1957,\n", " 'end': 1961},\n", " {'entity': 'I-EN',\n", " 'score': 0.999992,\n", " 'index': 460,\n", " 'word': 's',\n", " 'start': 1961,\n", " 'end': 1962},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999926,\n", " 'index': 461,\n", " 'word': '▁of',\n", " 'start': 1963,\n", " 'end': 1965},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999404,\n", " 'index': 462,\n", " 'word': '▁life',\n", " 'start': 1966,\n", " 'end': 1970},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999106,\n", " 'index': 463,\n", " 'word': ',',\n", " 'start': 1970,\n", " 'end': 1971},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999943,\n", " 'index': 464,\n", " 'word': '▁you',\n", " 'start': 1972,\n", " 'end': 1975},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999944,\n", " 'index': 465,\n", " 'word': '▁could',\n", " 'start': 1976,\n", " 'end': 1981},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999945,\n", " 'index': 466,\n", " 'word': '▁easily',\n", " 'start': 1982,\n", " 'end': 1988},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999939,\n", " 'index': 467,\n", " 'word': '▁see',\n", " 'start': 1989,\n", " 'end': 1992},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 468,\n", " 'word': '▁what',\n", " 'start': 1993,\n", " 'end': 1997},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999944,\n", " 'index': 469,\n", " 'word': '▁they',\n", " 'start': 1998,\n", " 'end': 2002},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999945,\n", " 'index': 470,\n", " 'word': '▁were',\n", " 'start': 2003,\n", " 'end': 2007},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 471,\n", " 'word': '.',\n", " 'start': 2007,\n", " 'end': 2008},\n", " {'entity': 'I-EN',\n", " 'score': 0.999995,\n", " 'index': 472,\n", " 'word': '▁What',\n", " 'start': 2009,\n", " 'end': 2013},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 473,\n", " 'word': '▁the',\n", " 'start': 2014,\n", " 'end': 2017},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999475,\n", " 'index': 474,\n", " 'word': '▁picture',\n", " 'start': 2018,\n", " 'end': 2025},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 475,\n", " 'word': '▁showed',\n", " 'start': 2026,\n", " 'end': 2032},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 476,\n", " 'word': '▁was',\n", " 'start': 2033,\n", " 'end': 2036},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 477,\n", " 'word': '▁the',\n", " 'start': 2037,\n", " 'end': 2040},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 478,\n", " 'word': '▁but',\n", " 'start': 2041,\n", " 'end': 2044},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 479,\n", " 'word': 'te',\n", " 'start': 2044,\n", " 'end': 2046},\n", " {'entity': 'I-EN',\n", " 'score': 0.999995,\n", " 'index': 480,\n", " 'word': '▁or',\n", " 'start': 2047,\n", " 'end': 2049},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999937,\n", " 'index': 481,\n", " 'word': '▁mesa',\n", " 'start': 2050,\n", " 'end': 2054},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999949,\n", " 'index': 482,\n", " 'word': ',',\n", " 'start': 2054,\n", " 'end': 2055},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999957,\n", " 'index': 483,\n", " 'word': '▁which',\n", " 'start': 2056,\n", " 'end': 2061},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999607,\n", " 'index': 484,\n", " 'word': '▁are',\n", " 'start': 2062,\n", " 'end': 2065},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 485,\n", " 'word': '▁land',\n", " 'start': 2066,\n", " 'end': 2070},\n", " {'entity': 'I-EN',\n", " 'score': 0.999995,\n", " 'index': 486,\n", " 'word': 'form',\n", " 'start': 2070,\n", " 'end': 2074},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999547,\n", " 'index': 487,\n", " 'word': 's',\n", " 'start': 2074,\n", " 'end': 2075},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999945,\n", " 'index': 488,\n", " 'word': '▁common',\n", " 'start': 2076,\n", " 'end': 2082},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999464,\n", " 'index': 489,\n", " 'word': '▁around',\n", " 'start': 2083,\n", " 'end': 2089},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 490,\n", " 'word': '▁the',\n", " 'start': 2090,\n", " 'end': 2093},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999956,\n", " 'index': 491,\n", " 'word': '▁American',\n", " 'start': 2094,\n", " 'end': 2102},\n", " {'entity': 'I-EN',\n", " 'score': 0.99999523,\n", " 'index': 492,\n", " 'word': '▁West',\n", " 'start': 2103,\n", " 'end': 2107},\n", " {'entity': 'I-EN',\n", " 'score': 0.9999951,\n", " 'index': 493,\n", " 'word': '.',\n", " 'start': 2107,\n", " 'end': 2108}]" ] }, "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"msislam/code-mixed-language-detection-XLMRoberta\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"msislam/code-mixed-language-detection-XLMRoberta\")\n", "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "\n", "ner_results = nlp(text)\n", "ner_results" ] }, { "cell_type": "code", "execution_count": 131, "id": "f6603b67-95f1-4da1-9cff-da6f138c57fa", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "I-EN 493\n", "dtype: int64\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
entityscore
entity
I-EN4930.999995
\n", "
" ], "text/plain": [ " entity score\n", "entity \n", "I-EN 493 0.999995" ] }, "execution_count": 131, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "with open(\"39 msislamcode-mixed-language-detection-XLMRoberta.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()\n", "\n", "\n", "aux.groupby(['entity']) \\\n", " .agg({'entity':'size', 'score':'mean'})" ] }, { "cell_type": "markdown", "id": "2c588a1c-faa2-4955-bc83-9bdf2bdb3692", "metadata": {}, "source": [ "## 40 DunnBC22/bert-base-multilingual-cased-fine_tuned-ner-WikiNeural_Multilingual" ] }, { "cell_type": "code", "execution_count": 82, "id": "21949283-c47a-4c93-8965-98ce4b11d09a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'entity': 'B-ORG',\n", " 'score': 0.98604935,\n", " 'index': 8,\n", " 'word': 'NASA',\n", " 'start': 16,\n", " 'end': 20},\n", " {'entity': 'I-MISC',\n", " 'score': 0.67684585,\n", " 'index': 23,\n", " 'word': 'Face',\n", " 'start': 88,\n", " 'end': 92},\n", " {'entity': 'I-MISC',\n", " 'score': 0.6898482,\n", " 'index': 24,\n", " 'word': 'On',\n", " 'start': 93,\n", " 'end': 95},\n", " {'entity': 'I-MISC',\n", " 'score': 0.72657937,\n", " 'index': 25,\n", " 'word': 'Mars',\n", " 'start': 96,\n", " 'end': 100},\n", " {'entity': 'B-LOC',\n", " 'score': 0.8695399,\n", " 'index': 37,\n", " 'word': 'Mars',\n", " 'start': 152,\n", " 'end': 156},\n", " {'entity': 'I-MISC',\n", " 'score': 0.87388104,\n", " 'index': 60,\n", " 'word': 'Viking',\n", " 'start': 240,\n", " 'end': 246},\n", " {'entity': 'I-MISC',\n", " 'score': 0.9686202,\n", " 'index': 61,\n", " 'word': '1',\n", " 'start': 247,\n", " 'end': 248},\n", " {'entity': 'I-MISC',\n", " 'score': 0.6234842,\n", " 'index': 98,\n", " 'word': 'Mart',\n", " 'start': 407,\n", " 'end': 411},\n", " {'entity': 'I-MISC',\n", " 'score': 0.7816169,\n", " 'index': 99,\n", " 'word': '##ian',\n", " 'start': 411,\n", " 'end': 414},\n", " {'entity': 'B-LOC',\n", " 'score': 0.91626126,\n", " 'index': 104,\n", " 'word': 'C',\n", " 'start': 435,\n", " 'end': 436},\n", " {'entity': 'I-LOC',\n", " 'score': 0.8611923,\n", " 'index': 105,\n", " 'word': '##yd',\n", " 'start': 436,\n", " 'end': 438},\n", " {'entity': 'I-LOC',\n", " 'score': 0.80380625,\n", " 'index': 106,\n", " 'word': '##onia',\n", " 'start': 438,\n", " 'end': 442},\n", " {'entity': 'B-LOC',\n", " 'score': 0.4532804,\n", " 'index': 121,\n", " 'word': 'Egypt',\n", " 'start': 496,\n", " 'end': 501},\n", " {'entity': 'I-MISC',\n", " 'score': 0.64347696,\n", " 'index': 122,\n", " 'word': '##ion',\n", " 'start': 501,\n", " 'end': 504},\n", " {'entity': 'I-MISC',\n", " 'score': 0.48230314,\n", " 'index': 123,\n", " 'word': 'Ph',\n", " 'start': 505,\n", " 'end': 507},\n", " {'entity': 'I-MISC',\n", " 'score': 0.4377054,\n", " 'index': 124,\n", " 'word': '##ara',\n", " 'start': 507,\n", " 'end': 510},\n", " {'entity': 'I-MISC',\n", " 'score': 0.49611342,\n", " 'index': 125,\n", " 'word': '##oh',\n", " 'start': 510,\n", " 'end': 512},\n", " {'entity': 'B-ORG',\n", " 'score': 0.9510295,\n", " 'index': 194,\n", " 'word': 'NASA',\n", " 'start': 801,\n", " 'end': 805},\n", " {'entity': 'B-LOC',\n", " 'score': 0.8271464,\n", " 'index': 205,\n", " 'word': 'Mars',\n", " 'start': 843,\n", " 'end': 847},\n", " {'entity': 'B-LOC',\n", " 'score': 0.8054947,\n", " 'index': 215,\n", " 'word': 'Mars',\n", " 'start': 875,\n", " 'end': 879},\n", " {'entity': 'B-LOC',\n", " 'score': 0.865675,\n", " 'index': 263,\n", " 'word': 'Mars',\n", " 'start': 1088,\n", " 'end': 1092},\n", " {'entity': 'B-ORG',\n", " 'score': 0.98267186,\n", " 'index': 282,\n", " 'word': 'NASA',\n", " 'start': 1169,\n", " 'end': 1173},\n", " {'entity': 'B-LOC',\n", " 'score': 0.70751286,\n", " 'index': 291,\n", " 'word': 'Mars',\n", " 'start': 1220,\n", " 'end': 1224},\n", " {'entity': 'B-PER',\n", " 'score': 0.99715245,\n", " 'index': 316,\n", " 'word': 'Michael',\n", " 'start': 1312,\n", " 'end': 1319},\n", " {'entity': 'I-PER',\n", " 'score': 0.9968267,\n", " 'index': 317,\n", " 'word': 'Mali',\n", " 'start': 1320,\n", " 'end': 1324},\n", " {'entity': 'I-PER',\n", " 'score': 0.99570113,\n", " 'index': 318,\n", " 'word': '##n',\n", " 'start': 1324,\n", " 'end': 1325},\n", " {'entity': 'I-MISC',\n", " 'score': 0.83701926,\n", " 'index': 321,\n", " 'word': 'Mars',\n", " 'start': 1334,\n", " 'end': 1338},\n", " {'entity': 'I-MISC',\n", " 'score': 0.8589732,\n", " 'index': 322,\n", " 'word': 'Or',\n", " 'start': 1339,\n", " 'end': 1341},\n", " {'entity': 'I-MISC',\n", " 'score': 0.91832304,\n", " 'index': 323,\n", " 'word': '##biter',\n", " 'start': 1341,\n", " 'end': 1346},\n", " {'entity': 'I-MISC',\n", " 'score': 0.86819035,\n", " 'index': 338,\n", " 'word': 'Viking',\n", " 'start': 1419,\n", " 'end': 1425},\n", " {'entity': 'B-PER',\n", " 'score': 0.9447502,\n", " 'index': 415,\n", " 'word': 'Mali',\n", " 'start': 1745,\n", " 'end': 1749},\n", " {'entity': 'I-PER',\n", " 'score': 0.95286584,\n", " 'index': 416,\n", " 'word': '##n',\n", " 'start': 1749,\n", " 'end': 1750},\n", " {'entity': 'B-LOC',\n", " 'score': 0.6390979,\n", " 'index': 492,\n", " 'word': 'American',\n", " 'start': 2094,\n", " 'end': 2102},\n", " {'entity': 'I-LOC',\n", " 'score': 0.63282007,\n", " 'index': 493,\n", " 'word': 'West',\n", " 'start': 2103,\n", " 'end': 2107}]" ] }, "execution_count": 82, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"DunnBC22/bert-base-multilingual-cased-fine_tuned-ner-WikiNeural_Multilingual\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"DunnBC22/bert-base-multilingual-cased-fine_tuned-ner-WikiNeural_Multilingual\")\n", "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "\n", "ner_results = nlp(text)\n", "ner_results\n", "\n" ] }, { "cell_type": "code", "execution_count": 83, "id": "1cbc0df4-5b76-4aef-9ca4-915b70274014", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "B-LOC 8\n", "B-ORG 3\n", "B-PER 2\n", "I-LOC 3\n", "I-MISC 15\n", "I-PER 3\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word \n", "B-LOC American 1\n", " C 1\n", " Egypt 1\n", " Mars 5\n", "B-ORG NASA 3\n", "B-PER Mali 1\n", " Michael 1\n", "I-LOC ##onia 1\n", " ##yd 1\n", " West 1\n", "I-MISC ##ara 1\n", " ##biter 1\n", " ##ian 1\n", " ##ion 1\n", " ##oh 1\n", " 1 1\n", " Face 1\n", " Mars 2\n", " Mart 1\n", " On 1\n", " Or 1\n", " Ph 1\n", " Viking 2\n", "I-PER ##n 2\n", " Mali 1\n", "dtype: int64" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "with open(\"40 DunnBC22bert-base-multilingual-cased-fine_tuned-ner-WikiNeural_Multilingual.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()" ] }, { "cell_type": "markdown", "id": "fd1d4edf-44f2-4d33-aaef-74e2351995ae", "metadata": {}, "source": [ "## 41 rollerhafeezh-amikom/xlm-roberta-base-ner-silvanus " ] }, { "cell_type": "code", "execution_count": 85, "id": "31b2cf11-39a1-43cf-9679-76eaef9b5428", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'entity': 'B-LOC',\n", " 'score': 0.9711031,\n", " 'index': 103,\n", " 'word': '▁Cy',\n", " 'start': 435,\n", " 'end': 437},\n", " {'entity': 'I-LOC',\n", " 'score': 0.97973377,\n", " 'index': 104,\n", " 'word': 'do',\n", " 'start': 437,\n", " 'end': 439},\n", " {'entity': 'I-LOC',\n", " 'score': 0.98118365,\n", " 'index': 105,\n", " 'word': 'nia',\n", " 'start': 439,\n", " 'end': 442},\n", " {'entity': 'B-DAT',\n", " 'score': 0.99709177,\n", " 'index': 315,\n", " 'word': '▁April',\n", " 'start': 1297,\n", " 'end': 1302},\n", " {'entity': 'I-DAT',\n", " 'score': 0.9621371,\n", " 'index': 316,\n", " 'word': '▁5',\n", " 'start': 1303,\n", " 'end': 1304},\n", " {'entity': 'I-DAT',\n", " 'score': 0.9620826,\n", " 'index': 317,\n", " 'word': ',',\n", " 'start': 1304,\n", " 'end': 1305},\n", " {'entity': 'I-DAT',\n", " 'score': 0.98092973,\n", " 'index': 318,\n", " 'word': '▁1998.',\n", " 'start': 1306,\n", " 'end': 1311},\n", " {'entity': 'B-DAT',\n", " 'score': 0.9965664,\n", " 'index': 395,\n", " 'word': '▁April',\n", " 'start': 1652,\n", " 'end': 1657},\n", " {'entity': 'I-DAT',\n", " 'score': 0.9623601,\n", " 'index': 396,\n", " 'word': '▁8',\n", " 'start': 1658,\n", " 'end': 1659},\n", " {'entity': 'I-DAT',\n", " 'score': 0.9603569,\n", " 'index': 397,\n", " 'word': ',',\n", " 'start': 1659,\n", " 'end': 1660},\n", " {'entity': 'I-DAT',\n", " 'score': 0.9859262,\n", " 'index': 398,\n", " 'word': '▁2001',\n", " 'start': 1661,\n", " 'end': 1665},\n", " {'entity': 'B-LOC',\n", " 'score': 0.9732382,\n", " 'index': 491,\n", " 'word': '▁American',\n", " 'start': 2094,\n", " 'end': 2102},\n", " {'entity': 'I-LOC',\n", " 'score': 0.6832874,\n", " 'index': 492,\n", " 'word': '▁West',\n", " 'start': 2103,\n", " 'end': 2107}]" ] }, "execution_count": 85, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"rollerhafeezh-amikom/xlm-roberta-base-ner-silvanus\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"rollerhafeezh-amikom/xlm-roberta-base-ner-silvanus\")\n", "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "\n", "ner_results = nlp(text)\n", "ner_results" ] }, { "cell_type": "code", "execution_count": 86, "id": "e7cc7649-0234-4fb3-9802-e5909f30a3b1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "B-DAT 2\n", "B-LOC 2\n", "I-DAT 6\n", "I-LOC 3\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word \n", "B-DAT ▁April 2\n", "B-LOC ▁American 1\n", " ▁Cy 1\n", "I-DAT , 2\n", " ▁1998. 1\n", " ▁2001 1\n", " ▁5 1\n", " ▁8 1\n", "I-LOC do 1\n", " nia 1\n", " ▁West 1\n", "dtype: int64" ] }, "execution_count": 86, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with open(\"41 rollerhafeezh-amikomxlm-roberta-base-ner-silvanus.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()" ] }, { "cell_type": "markdown", "id": "dff1d357-0650-4946-8703-0fe86143eb17", "metadata": {}, "source": [ "## 42 orgcatorg/distilbert-base-multilingual-cased-ner" ] }, { "cell_type": "code", "execution_count": 88, "id": "b5293a8a-c29f-4090-9db2-b7f92b4842f5", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n" ] }, { "data": { "text/plain": [ "[{'entity': 'B-LOC',\n", " 'score': 0.9980641,\n", " 'index': 104,\n", " 'word': 'C',\n", " 'start': 435,\n", " 'end': 436},\n", " {'entity': 'B-LOC',\n", " 'score': 0.99705327,\n", " 'index': 105,\n", " 'word': '##yd',\n", " 'start': 436,\n", " 'end': 438},\n", " {'entity': 'B-LOC',\n", " 'score': 0.9987865,\n", " 'index': 106,\n", " 'word': '##onia',\n", " 'start': 438,\n", " 'end': 442},\n", " {'entity': 'B-PER',\n", " 'score': 0.66370535,\n", " 'index': 121,\n", " 'word': 'Egypt',\n", " 'start': 496,\n", " 'end': 501},\n", " {'entity': 'B-PER',\n", " 'score': 0.6953692,\n", " 'index': 122,\n", " 'word': '##ion',\n", " 'start': 501,\n", " 'end': 504},\n", " {'entity': 'I-PER',\n", " 'score': 0.6620473,\n", " 'index': 123,\n", " 'word': 'Ph',\n", " 'start': 505,\n", " 'end': 507},\n", " {'entity': 'I-PER',\n", " 'score': 0.7284523,\n", " 'index': 124,\n", " 'word': '##ara',\n", " 'start': 507,\n", " 'end': 510},\n", " {'entity': 'I-PER',\n", " 'score': 0.704528,\n", " 'index': 125,\n", " 'word': '##oh',\n", " 'start': 510,\n", " 'end': 512},\n", " {'entity': 'B-PER',\n", " 'score': 0.9904669,\n", " 'index': 316,\n", " 'word': 'Michael',\n", " 'start': 1312,\n", " 'end': 1319},\n", " {'entity': 'I-PER',\n", " 'score': 0.9859772,\n", " 'index': 317,\n", " 'word': 'Mali',\n", " 'start': 1320,\n", " 'end': 1324},\n", " {'entity': 'I-PER',\n", " 'score': 0.9845413,\n", " 'index': 318,\n", " 'word': '##n',\n", " 'start': 1324,\n", " 'end': 1325},\n", " {'entity': 'B-LOC',\n", " 'score': 0.6711369,\n", " 'index': 492,\n", " 'word': 'American',\n", " 'start': 2094,\n", " 'end': 2102},\n", " {'entity': 'I-LOC',\n", " 'score': 0.49512243,\n", " 'index': 493,\n", " 'word': 'West',\n", " 'start': 2103,\n", " 'end': 2107}]" ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"orgcatorg/distilbert-base-multilingual-cased-ner\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"orgcatorg/distilbert-base-multilingual-cased-ner\")\n", "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "\n", "ner_results = nlp(text)\n", "ner_results" ] }, { "cell_type": "code", "execution_count": 89, "id": "63543f53-7959-4cae-9090-926897ead7f5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "B-LOC 4\n", "B-PER 3\n", "I-LOC 1\n", "I-PER 5\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word \n", "B-LOC ##onia 1\n", " ##yd 1\n", " American 1\n", " C 1\n", "B-PER ##ion 1\n", " Egypt 1\n", " Michael 1\n", "I-LOC West 1\n", "I-PER ##ara 1\n", " ##n 1\n", " ##oh 1\n", " Mali 1\n", " Ph 1\n", "dtype: int64" ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with open(\"42 orgcatorgdistilbert-base-multilingual-cased-ner.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()" ] }, { "cell_type": "markdown", "id": "846da5e7-bd48-4d24-9c48-4da51f22fadb", "metadata": {}, "source": [ "## 43 orgcatorg/xlm-v-base-ner" ] }, { "cell_type": "code", "execution_count": 92, "id": "94d18491-a366-4197-b0c5-de86e41ee576", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n" ] }, { "data": { "text/plain": [ "[{'entity': 'B-ORG',\n", " 'score': 0.8529484,\n", " 'index': 8,\n", " 'word': '▁NASA',\n", " 'start': 15,\n", " 'end': 20},\n", " {'entity': 'B-ORG',\n", " 'score': 0.67219996,\n", " 'index': 23,\n", " 'word': '▁Face',\n", " 'start': 87,\n", " 'end': 92},\n", " {'entity': 'I-ORG',\n", " 'score': 0.9593816,\n", " 'index': 24,\n", " 'word': '▁On',\n", " 'start': 92,\n", " 'end': 95},\n", " {'entity': 'I-ORG',\n", " 'score': 0.98047435,\n", " 'index': 25,\n", " 'word': '▁Mars',\n", " 'start': 95,\n", " 'end': 100},\n", " {'entity': 'B-ORG',\n", " 'score': 0.97809476,\n", " 'index': 57,\n", " 'word': '▁Viking',\n", " 'start': 239,\n", " 'end': 246},\n", " {'entity': 'I-ORG',\n", " 'score': 0.97545755,\n", " 'index': 58,\n", " 'word': '▁1',\n", " 'start': 246,\n", " 'end': 248},\n", " {'entity': 'I-ORG',\n", " 'score': 0.5685065,\n", " 'index': 59,\n", " 'word': '▁space',\n", " 'start': 248,\n", " 'end': 254},\n", " {'entity': 'B-LOC',\n", " 'score': 0.7317715,\n", " 'index': 93,\n", " 'word': '▁Marti',\n", " 'start': 406,\n", " 'end': 412},\n", " {'entity': 'B-LOC',\n", " 'score': 0.77504426,\n", " 'index': 94,\n", " 'word': 'an',\n", " 'start': 412,\n", " 'end': 414},\n", " {'entity': 'B-LOC',\n", " 'score': 0.73297995,\n", " 'index': 99,\n", " 'word': '▁Cy',\n", " 'start': 434,\n", " 'end': 437},\n", " {'entity': 'B-LOC',\n", " 'score': 0.9430252,\n", " 'index': 113,\n", " 'word': '▁Egypti',\n", " 'start': 495,\n", " 'end': 502},\n", " {'entity': 'B-LOC',\n", " 'score': 0.9459621,\n", " 'index': 114,\n", " 'word': 'on',\n", " 'start': 502,\n", " 'end': 504},\n", " {'entity': 'I-LOC',\n", " 'score': 0.98246944,\n", " 'index': 115,\n", " 'word': '▁Pharao',\n", " 'start': 504,\n", " 'end': 511},\n", " {'entity': 'I-LOC',\n", " 'score': 0.9633557,\n", " 'index': 116,\n", " 'word': 'h',\n", " 'start': 511,\n", " 'end': 512},\n", " {'entity': 'B-PER',\n", " 'score': 0.9796036,\n", " 'index': 294,\n", " 'word': '▁Michael',\n", " 'start': 1311,\n", " 'end': 1319},\n", " {'entity': 'I-PER',\n", " 'score': 0.98091847,\n", " 'index': 295,\n", " 'word': '▁Malin',\n", " 'start': 1319,\n", " 'end': 1325},\n", " {'entity': 'B-ORG',\n", " 'score': 0.9358236,\n", " 'index': 298,\n", " 'word': '▁Mars',\n", " 'start': 1333,\n", " 'end': 1338},\n", " {'entity': 'I-ORG',\n", " 'score': 0.90482205,\n", " 'index': 299,\n", " 'word': '▁Orbit',\n", " 'start': 1338,\n", " 'end': 1344},\n", " {'entity': 'I-ORG',\n", " 'score': 0.70797783,\n", " 'index': 300,\n", " 'word': 'er',\n", " 'start': 1344,\n", " 'end': 1346},\n", " {'entity': 'B-LOC',\n", " 'score': 0.73849326,\n", " 'index': 448,\n", " 'word': '▁but',\n", " 'start': 2040,\n", " 'end': 2044},\n", " {'entity': 'B-LOC',\n", " 'score': 0.6988277,\n", " 'index': 449,\n", " 'word': 'te',\n", " 'start': 2044,\n", " 'end': 2046},\n", " {'entity': 'B-LOC',\n", " 'score': 0.5565135,\n", " 'index': 460,\n", " 'word': '▁American',\n", " 'start': 2093,\n", " 'end': 2102}]" ] }, "execution_count": 92, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"orgcatorg/xlm-v-base-ner\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"orgcatorg/xlm-v-base-ner\")\n", "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "\n", "ner_results = nlp(text)\n", "ner_results" ] }, { "cell_type": "code", "execution_count": 93, "id": "da937121-ff84-4ee6-9cdc-a624047999ea", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "B-LOC 8\n", "B-ORG 4\n", "B-PER 1\n", "I-LOC 2\n", "I-ORG 6\n", "I-PER 1\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word \n", "B-LOC an 1\n", " on 1\n", " te 1\n", " ▁American 1\n", " ▁Cy 1\n", " ▁Egypti 1\n", " ▁Marti 1\n", " ▁but 1\n", "B-ORG ▁Face 1\n", " ▁Mars 1\n", " ▁NASA 1\n", " ▁Viking 1\n", "B-PER ▁Michael 1\n", "I-LOC h 1\n", " ▁Pharao 1\n", "I-ORG er 1\n", " ▁1 1\n", " ▁Mars 1\n", " ▁On 1\n", " ▁Orbit 1\n", " ▁space 1\n", "I-PER ▁Malin 1\n", "dtype: int64" ] }, "execution_count": 93, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with open(\"43 orgcatorgxlm-v-base-ner.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()" ] }, { "cell_type": "markdown", "id": "46e2733d-93b9-447b-91d4-efae62f4d2b6", "metadata": {}, "source": [ "## 44 dejanseo/LinkBERT-XL" ] }, { "cell_type": "code", "execution_count": 95, "id": "7c2c315e-cf4c-453e-afcf-816049cfc16a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'entity': 'LABEL_0',\n", " 'score': 0.99891233,\n", " 'index': 1,\n", " 'word': '▁So',\n", " 'start': 0,\n", " 'end': 2},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99892116,\n", " 'index': 2,\n", " 'word': ',',\n", " 'start': 2,\n", " 'end': 3},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9979603,\n", " 'index': 3,\n", " 'word': '▁if',\n", " 'start': 4,\n", " 'end': 6},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99819654,\n", " 'index': 4,\n", " 'word': '▁you',\n", " 'start': 7,\n", " 'end': 10},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9984597,\n", " 'index': 5,\n", " 'word': \"'\",\n", " 'start': 10,\n", " 'end': 11},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99839216,\n", " 'index': 6,\n", " 'word': 're',\n", " 'start': 11,\n", " 'end': 13},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9984572,\n", " 'index': 7,\n", " 'word': '▁a',\n", " 'start': 14,\n", " 'end': 15},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9959991,\n", " 'index': 8,\n", " 'word': '▁NASA',\n", " 'start': 16,\n", " 'end': 20},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99812514,\n", " 'index': 9,\n", " 'word': '▁scientist',\n", " 'start': 21,\n", " 'end': 30},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9979564,\n", " 'index': 10,\n", " 'word': ',',\n", " 'start': 30,\n", " 'end': 31},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98711306,\n", " 'index': 11,\n", " 'word': '▁you',\n", " 'start': 32,\n", " 'end': 35},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9878263,\n", " 'index': 12,\n", " 'word': '▁should',\n", " 'start': 36,\n", " 'end': 42},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98597556,\n", " 'index': 13,\n", " 'word': '▁be',\n", " 'start': 43,\n", " 'end': 45},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9839212,\n", " 'index': 14,\n", " 'word': '▁able',\n", " 'start': 46,\n", " 'end': 50},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9830526,\n", " 'index': 15,\n", " 'word': '▁to',\n", " 'start': 51,\n", " 'end': 53},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9660563,\n", " 'index': 16,\n", " 'word': '▁tell',\n", " 'start': 54,\n", " 'end': 58},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9594903,\n", " 'index': 17,\n", " 'word': '▁me',\n", " 'start': 59,\n", " 'end': 61},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9053577,\n", " 'index': 18,\n", " 'word': '▁the',\n", " 'start': 62,\n", " 'end': 65},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.84808695,\n", " 'index': 19,\n", " 'word': '▁whole',\n", " 'start': 66,\n", " 'end': 71},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.87426686,\n", " 'index': 20,\n", " 'word': '▁story',\n", " 'start': 72,\n", " 'end': 77},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9940403,\n", " 'index': 21,\n", " 'word': '▁about',\n", " 'start': 78,\n", " 'end': 83},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9932741,\n", " 'index': 22,\n", " 'word': '▁the',\n", " 'start': 84,\n", " 'end': 87},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9718788,\n", " 'index': 23,\n", " 'word': '▁Face',\n", " 'start': 88,\n", " 'end': 92},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98829424,\n", " 'index': 24,\n", " 'word': '▁On',\n", " 'start': 93,\n", " 'end': 95},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9920151,\n", " 'index': 25,\n", " 'word': '▁Mars',\n", " 'start': 96,\n", " 'end': 100},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99279845,\n", " 'index': 26,\n", " 'word': ',',\n", " 'start': 100,\n", " 'end': 101},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9976579,\n", " 'index': 27,\n", " 'word': '▁which',\n", " 'start': 102,\n", " 'end': 107},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9982932,\n", " 'index': 28,\n", " 'word': '▁obviously',\n", " 'start': 108,\n", " 'end': 117},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99760216,\n", " 'index': 29,\n", " 'word': '▁is',\n", " 'start': 118,\n", " 'end': 120},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9973246,\n", " 'index': 30,\n", " 'word': '▁evidence',\n", " 'start': 121,\n", " 'end': 129},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99918765,\n", " 'index': 31,\n", " 'word': '▁that',\n", " 'start': 130,\n", " 'end': 134},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99898845,\n", " 'index': 32,\n", " 'word': '▁there',\n", " 'start': 135,\n", " 'end': 140},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9990656,\n", " 'index': 33,\n", " 'word': '▁is',\n", " 'start': 141,\n", " 'end': 143},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.96821344,\n", " 'index': 34,\n", " 'word': '▁life',\n", " 'start': 144,\n", " 'end': 148},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9991037,\n", " 'index': 35,\n", " 'word': '▁on',\n", " 'start': 149,\n", " 'end': 151},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9991229,\n", " 'index': 36,\n", " 'word': '▁Mars',\n", " 'start': 152,\n", " 'end': 156},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9988174,\n", " 'index': 37,\n", " 'word': ',',\n", " 'start': 156,\n", " 'end': 157},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9993888,\n", " 'index': 38,\n", " 'word': '▁and',\n", " 'start': 158,\n", " 'end': 161},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99929714,\n", " 'index': 39,\n", " 'word': '▁that',\n", " 'start': 162,\n", " 'end': 166},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9986438,\n", " 'index': 40,\n", " 'word': '▁the',\n", " 'start': 167,\n", " 'end': 170},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99628323,\n", " 'index': 41,\n", " 'word': '▁face',\n", " 'start': 171,\n", " 'end': 175},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99901855,\n", " 'index': 42,\n", " 'word': '▁was',\n", " 'start': 176,\n", " 'end': 179},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9981996,\n", " 'index': 43,\n", " 'word': '▁created',\n", " 'start': 180,\n", " 'end': 187},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9987431,\n", " 'index': 44,\n", " 'word': '▁by',\n", " 'start': 188,\n", " 'end': 190},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9988205,\n", " 'index': 45,\n", " 'word': '▁alien',\n", " 'start': 191,\n", " 'end': 196},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99885654,\n", " 'index': 46,\n", " 'word': 's',\n", " 'start': 196,\n", " 'end': 197},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9992561,\n", " 'index': 47,\n", " 'word': ',',\n", " 'start': 197,\n", " 'end': 198},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99904126,\n", " 'index': 48,\n", " 'word': '▁correct',\n", " 'start': 199,\n", " 'end': 206},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9989654,\n", " 'index': 49,\n", " 'word': '?\"',\n", " 'start': 206,\n", " 'end': 208},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99859565,\n", " 'index': 50,\n", " 'word': '▁No',\n", " 'start': 209,\n", " 'end': 211},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99823654,\n", " 'index': 51,\n", " 'word': ',',\n", " 'start': 211,\n", " 'end': 212},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9979513,\n", " 'index': 52,\n", " 'word': '▁twenty',\n", " 'start': 213,\n", " 'end': 219},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9984366,\n", " 'index': 53,\n", " 'word': '▁five',\n", " 'start': 220,\n", " 'end': 224},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99829,\n", " 'index': 54,\n", " 'word': '▁years',\n", " 'start': 225,\n", " 'end': 230},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9984238,\n", " 'index': 55,\n", " 'word': '▁ago',\n", " 'start': 231,\n", " 'end': 234},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9984384,\n", " 'index': 56,\n", " 'word': ',',\n", " 'start': 234,\n", " 'end': 235},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98689705,\n", " 'index': 57,\n", " 'word': '▁our',\n", " 'start': 236,\n", " 'end': 239},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.8483806,\n", " 'index': 58,\n", " 'word': '▁Viking',\n", " 'start': 240,\n", " 'end': 246},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.8561226,\n", " 'index': 59,\n", " 'word': '▁1',\n", " 'start': 247,\n", " 'end': 248},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.96529603,\n", " 'index': 60,\n", " 'word': '▁space',\n", " 'start': 249,\n", " 'end': 254},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9623912,\n", " 'index': 61,\n", " 'word': 'craft',\n", " 'start': 254,\n", " 'end': 259},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9976211,\n", " 'index': 62,\n", " 'word': '▁was',\n", " 'start': 260,\n", " 'end': 263},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99899536,\n", " 'index': 63,\n", " 'word': '▁circ',\n", " 'start': 264,\n", " 'end': 268},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99855524,\n", " 'index': 64,\n", " 'word': 'ling',\n", " 'start': 268,\n", " 'end': 272},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99938333,\n", " 'index': 65,\n", " 'word': '▁the',\n", " 'start': 273,\n", " 'end': 276},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9993606,\n", " 'index': 66,\n", " 'word': '▁planet',\n", " 'start': 277,\n", " 'end': 283},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99926203,\n", " 'index': 67,\n", " 'word': ',',\n", " 'start': 283,\n", " 'end': 284},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99685955,\n", " 'index': 68,\n", " 'word': '▁sna',\n", " 'start': 285,\n", " 'end': 288},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99678254,\n", " 'index': 69,\n", " 'word': 'pping',\n", " 'start': 288,\n", " 'end': 293},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9975854,\n", " 'index': 70,\n", " 'word': '▁photos',\n", " 'start': 294,\n", " 'end': 300},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9972172,\n", " 'index': 71,\n", " 'word': ',',\n", " 'start': 300,\n", " 'end': 301},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9966794,\n", " 'index': 72,\n", " 'word': '▁when',\n", " 'start': 302,\n", " 'end': 306},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99142355,\n", " 'index': 73,\n", " 'word': '▁it',\n", " 'start': 307,\n", " 'end': 309},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9804128,\n", " 'index': 74,\n", " 'word': '▁spot',\n", " 'start': 310,\n", " 'end': 314},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.97927594,\n", " 'index': 75,\n", " 'word': 'ted',\n", " 'start': 314,\n", " 'end': 317},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9854144,\n", " 'index': 76,\n", " 'word': '▁the',\n", " 'start': 318,\n", " 'end': 321},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98293096,\n", " 'index': 77,\n", " 'word': '▁shadow',\n", " 'start': 322,\n", " 'end': 328},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9837083,\n", " 'index': 78,\n", " 'word': 'y',\n", " 'start': 328,\n", " 'end': 329},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9853166,\n", " 'index': 79,\n", " 'word': '▁like',\n", " 'start': 330,\n", " 'end': 334},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9849769,\n", " 'index': 80,\n", " 'word': 'ness',\n", " 'start': 334,\n", " 'end': 338},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99710613,\n", " 'index': 81,\n", " 'word': '▁of',\n", " 'start': 339,\n", " 'end': 341},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99454665,\n", " 'index': 82,\n", " 'word': '▁a',\n", " 'start': 342,\n", " 'end': 343},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9947602,\n", " 'index': 83,\n", " 'word': '▁human',\n", " 'start': 344,\n", " 'end': 349},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9950589,\n", " 'index': 84,\n", " 'word': '▁face',\n", " 'start': 350,\n", " 'end': 354},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99469876,\n", " 'index': 85,\n", " 'word': '.',\n", " 'start': 354,\n", " 'end': 355},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98264503,\n", " 'index': 86,\n", " 'word': '▁Us',\n", " 'start': 356,\n", " 'end': 358},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9652982,\n", " 'index': 87,\n", " 'word': '▁scientist',\n", " 'start': 359,\n", " 'end': 368},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9688392,\n", " 'index': 88,\n", " 'word': 's',\n", " 'start': 368,\n", " 'end': 369},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.93723047,\n", " 'index': 89,\n", " 'word': '▁figure',\n", " 'start': 370,\n", " 'end': 376},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9343609,\n", " 'index': 90,\n", " 'word': 'd',\n", " 'start': 376,\n", " 'end': 377},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9389645,\n", " 'index': 91,\n", " 'word': '▁out',\n", " 'start': 378,\n", " 'end': 381},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9976173,\n", " 'index': 92,\n", " 'word': '▁that',\n", " 'start': 382,\n", " 'end': 386},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9985746,\n", " 'index': 93,\n", " 'word': '▁it',\n", " 'start': 387,\n", " 'end': 389},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9990897,\n", " 'index': 94,\n", " 'word': '▁was',\n", " 'start': 390,\n", " 'end': 393},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9986003,\n", " 'index': 95,\n", " 'word': '▁just',\n", " 'start': 394,\n", " 'end': 398},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99798214,\n", " 'index': 96,\n", " 'word': '▁another',\n", " 'start': 399,\n", " 'end': 406},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9982463,\n", " 'index': 97,\n", " 'word': '▁Marti',\n", " 'start': 407,\n", " 'end': 412},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9976306,\n", " 'index': 98,\n", " 'word': 'an',\n", " 'start': 412,\n", " 'end': 414},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99826413,\n", " 'index': 99,\n", " 'word': '▁mesa',\n", " 'start': 415,\n", " 'end': 419},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9986713,\n", " 'index': 100,\n", " 'word': ',',\n", " 'start': 419,\n", " 'end': 420},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9990752,\n", " 'index': 101,\n", " 'word': '▁common',\n", " 'start': 421,\n", " 'end': 427},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9991449,\n", " 'index': 102,\n", " 'word': '▁around',\n", " 'start': 428,\n", " 'end': 434},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9987953,\n", " 'index': 103,\n", " 'word': '▁Cy',\n", " 'start': 435,\n", " 'end': 437},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9988625,\n", " 'index': 104,\n", " 'word': 'do',\n", " 'start': 437,\n", " 'end': 439},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99890935,\n", " 'index': 105,\n", " 'word': 'nia',\n", " 'start': 439,\n", " 'end': 442},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9983901,\n", " 'index': 106,\n", " 'word': ',',\n", " 'start': 442,\n", " 'end': 443},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98953724,\n", " 'index': 107,\n", " 'word': '▁only',\n", " 'start': 444,\n", " 'end': 448},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.97681797,\n", " 'index': 108,\n", " 'word': '▁this',\n", " 'start': 449,\n", " 'end': 453},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9744693,\n", " 'index': 109,\n", " 'word': '▁one',\n", " 'start': 454,\n", " 'end': 457},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98362905,\n", " 'index': 110,\n", " 'word': '▁had',\n", " 'start': 458,\n", " 'end': 461},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9713618,\n", " 'index': 111,\n", " 'word': '▁shadow',\n", " 'start': 462,\n", " 'end': 468},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9737309,\n", " 'index': 112,\n", " 'word': 's',\n", " 'start': 468,\n", " 'end': 469},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9887904,\n", " 'index': 113,\n", " 'word': '▁that',\n", " 'start': 470,\n", " 'end': 474},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9857012,\n", " 'index': 114,\n", " 'word': '▁made',\n", " 'start': 475,\n", " 'end': 479},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98496616,\n", " 'index': 115,\n", " 'word': '▁it',\n", " 'start': 480,\n", " 'end': 482},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9844059,\n", " 'index': 116,\n", " 'word': '▁look',\n", " 'start': 483,\n", " 'end': 487},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98615867,\n", " 'index': 117,\n", " 'word': '▁like',\n", " 'start': 488,\n", " 'end': 492},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98830485,\n", " 'index': 118,\n", " 'word': '▁an',\n", " 'start': 493,\n", " 'end': 495},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9811522,\n", " 'index': 119,\n", " 'word': '▁Egypt',\n", " 'start': 496,\n", " 'end': 501},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9806471,\n", " 'index': 120,\n", " 'word': 'ion',\n", " 'start': 501,\n", " 'end': 504},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9806395,\n", " 'index': 121,\n", " 'word': '▁Phar',\n", " 'start': 505,\n", " 'end': 509},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9844712,\n", " 'index': 122,\n", " 'word': 'a',\n", " 'start': 509,\n", " 'end': 510},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9841697,\n", " 'index': 123,\n", " 'word': 'oh',\n", " 'start': 510,\n", " 'end': 512},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9892211,\n", " 'index': 124,\n", " 'word': '.',\n", " 'start': 512,\n", " 'end': 513},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9986187,\n", " 'index': 125,\n", " 'word': '▁Very',\n", " 'start': 514,\n", " 'end': 518},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99793774,\n", " 'index': 126,\n", " 'word': '▁few',\n", " 'start': 519,\n", " 'end': 522},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99664587,\n", " 'index': 127,\n", " 'word': '▁days',\n", " 'start': 523,\n", " 'end': 527},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99783033,\n", " 'index': 128,\n", " 'word': '▁later',\n", " 'start': 528,\n", " 'end': 533},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9982863,\n", " 'index': 129,\n", " 'word': ',',\n", " 'start': 533,\n", " 'end': 534},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.94166446,\n", " 'index': 130,\n", " 'word': '▁we',\n", " 'start': 535,\n", " 'end': 537},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.85345703,\n", " 'index': 131,\n", " 'word': '▁reveal',\n", " 'start': 538,\n", " 'end': 544},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.8374294,\n", " 'index': 132,\n", " 'word': 'ed',\n", " 'start': 544,\n", " 'end': 546},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.8623249,\n", " 'index': 133,\n", " 'word': '▁the',\n", " 'start': 547,\n", " 'end': 550},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.8417424,\n", " 'index': 134,\n", " 'word': '▁image',\n", " 'start': 551,\n", " 'end': 556},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98973304,\n", " 'index': 135,\n", " 'word': '▁for',\n", " 'start': 557,\n", " 'end': 560},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9759,\n", " 'index': 136,\n", " 'word': '▁all',\n", " 'start': 561,\n", " 'end': 564},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99520445,\n", " 'index': 137,\n", " 'word': '▁to',\n", " 'start': 565,\n", " 'end': 567},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99369144,\n", " 'index': 138,\n", " 'word': '▁see',\n", " 'start': 568,\n", " 'end': 571},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99321544,\n", " 'index': 139,\n", " 'word': ',',\n", " 'start': 571,\n", " 'end': 572},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9991154,\n", " 'index': 140,\n", " 'word': '▁and',\n", " 'start': 573,\n", " 'end': 576},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9958181,\n", " 'index': 141,\n", " 'word': '▁we',\n", " 'start': 577,\n", " 'end': 579},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99414885,\n", " 'index': 142,\n", " 'word': '▁made',\n", " 'start': 580,\n", " 'end': 584},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9957287,\n", " 'index': 143,\n", " 'word': '▁sure',\n", " 'start': 585,\n", " 'end': 589},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9959401,\n", " 'index': 144,\n", " 'word': '▁to',\n", " 'start': 590,\n", " 'end': 592},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9936752,\n", " 'index': 145,\n", " 'word': '▁note',\n", " 'start': 593,\n", " 'end': 597},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.998691,\n", " 'index': 146,\n", " 'word': '▁that',\n", " 'start': 598,\n", " 'end': 602},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9956762,\n", " 'index': 147,\n", " 'word': '▁it',\n", " 'start': 603,\n", " 'end': 605},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99795747,\n", " 'index': 148,\n", " 'word': '▁was',\n", " 'start': 606,\n", " 'end': 609},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9948554,\n", " 'index': 149,\n", " 'word': '▁a',\n", " 'start': 610,\n", " 'end': 611},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.992023,\n", " 'index': 150,\n", " 'word': '▁huge',\n", " 'start': 612,\n", " 'end': 616},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.97803485,\n", " 'index': 151,\n", " 'word': '▁rock',\n", " 'start': 617,\n", " 'end': 621},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9896576,\n", " 'index': 152,\n", " 'word': '▁formation',\n", " 'start': 622,\n", " 'end': 631},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.999468,\n", " 'index': 153,\n", " 'word': '▁that',\n", " 'start': 632,\n", " 'end': 636},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9987307,\n", " 'index': 154,\n", " 'word': '▁just',\n", " 'start': 637,\n", " 'end': 641},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9978719,\n", " 'index': 155,\n", " 'word': '▁rese',\n", " 'start': 642,\n", " 'end': 646},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99729806,\n", " 'index': 156,\n", " 'word': 'mble',\n", " 'start': 646,\n", " 'end': 650},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9971686,\n", " 'index': 157,\n", " 'word': 'd',\n", " 'start': 650,\n", " 'end': 651},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99888545,\n", " 'index': 158,\n", " 'word': '▁a',\n", " 'start': 652,\n", " 'end': 653},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9983923,\n", " 'index': 159,\n", " 'word': '▁human',\n", " 'start': 654,\n", " 'end': 659},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9969125,\n", " 'index': 160,\n", " 'word': '▁head',\n", " 'start': 660,\n", " 'end': 664},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99962246,\n", " 'index': 161,\n", " 'word': '▁and',\n", " 'start': 665,\n", " 'end': 668},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99936336,\n", " 'index': 162,\n", " 'word': '▁face',\n", " 'start': 669,\n", " 'end': 673},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99953985,\n", " 'index': 163,\n", " 'word': ',',\n", " 'start': 673,\n", " 'end': 674},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9995252,\n", " 'index': 164,\n", " 'word': '▁but',\n", " 'start': 675,\n", " 'end': 678},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9984201,\n", " 'index': 165,\n", " 'word': '▁all',\n", " 'start': 679,\n", " 'end': 682},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9990183,\n", " 'index': 166,\n", " 'word': '▁of',\n", " 'start': 683,\n", " 'end': 685},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9980836,\n", " 'index': 167,\n", " 'word': '▁it',\n", " 'start': 686,\n", " 'end': 688},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99912924,\n", " 'index': 168,\n", " 'word': '▁was',\n", " 'start': 689,\n", " 'end': 692},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9986325,\n", " 'index': 169,\n", " 'word': '▁for',\n", " 'start': 693,\n", " 'end': 696},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99902165,\n", " 'index': 170,\n", " 'word': 'med',\n", " 'start': 696,\n", " 'end': 699},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99891686,\n", " 'index': 171,\n", " 'word': '▁by',\n", " 'start': 700,\n", " 'end': 702},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9992654,\n", " 'index': 172,\n", " 'word': '▁shadow',\n", " 'start': 703,\n", " 'end': 709},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99873906,\n", " 'index': 173,\n", " 'word': 's',\n", " 'start': 709,\n", " 'end': 710},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9986755,\n", " 'index': 174,\n", " 'word': '.',\n", " 'start': 710,\n", " 'end': 711},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9940135,\n", " 'index': 175,\n", " 'word': '▁We',\n", " 'start': 712,\n", " 'end': 714},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9937384,\n", " 'index': 176,\n", " 'word': '▁only',\n", " 'start': 715,\n", " 'end': 719},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98741966,\n", " 'index': 177,\n", " 'word': '▁announced',\n", " 'start': 720,\n", " 'end': 729},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9905053,\n", " 'index': 178,\n", " 'word': '▁it',\n", " 'start': 730,\n", " 'end': 732},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99872977,\n", " 'index': 179,\n", " 'word': '▁because',\n", " 'start': 733,\n", " 'end': 740},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99781847,\n", " 'index': 180,\n", " 'word': '▁we',\n", " 'start': 741,\n", " 'end': 743},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99807006,\n", " 'index': 181,\n", " 'word': '▁thought',\n", " 'start': 744,\n", " 'end': 751},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99812585,\n", " 'index': 182,\n", " 'word': '▁it',\n", " 'start': 752,\n", " 'end': 754},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9987987,\n", " 'index': 183,\n", " 'word': '▁would',\n", " 'start': 755,\n", " 'end': 760},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99814045,\n", " 'index': 184,\n", " 'word': '▁be',\n", " 'start': 761,\n", " 'end': 763},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9968272,\n", " 'index': 185,\n", " 'word': '▁a',\n", " 'start': 764,\n", " 'end': 765},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9960741,\n", " 'index': 186,\n", " 'word': '▁good',\n", " 'start': 766,\n", " 'end': 770},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9928383,\n", " 'index': 187,\n", " 'word': '▁way',\n", " 'start': 771,\n", " 'end': 774},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99515605,\n", " 'index': 188,\n", " 'word': '▁to',\n", " 'start': 775,\n", " 'end': 777},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9906125,\n", " 'index': 189,\n", " 'word': '▁engage',\n", " 'start': 778,\n", " 'end': 784},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9925929,\n", " 'index': 190,\n", " 'word': '▁the',\n", " 'start': 785,\n", " 'end': 788},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99023867,\n", " 'index': 191,\n", " 'word': '▁public',\n", " 'start': 789,\n", " 'end': 795},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99890065,\n", " 'index': 192,\n", " 'word': '▁with',\n", " 'start': 796,\n", " 'end': 800},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9960693,\n", " 'index': 193,\n", " 'word': '▁NASA',\n", " 'start': 801,\n", " 'end': 805},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9986105,\n", " 'index': 194,\n", " 'word': \"'\",\n", " 'start': 805,\n", " 'end': 806},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9988933,\n", " 'index': 195,\n", " 'word': 's',\n", " 'start': 806,\n", " 'end': 807},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9991429,\n", " 'index': 196,\n", " 'word': '▁finding',\n", " 'start': 808,\n", " 'end': 815},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99926883,\n", " 'index': 197,\n", " 'word': 's',\n", " 'start': 815,\n", " 'end': 816},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99931073,\n", " 'index': 198,\n", " 'word': ',',\n", " 'start': 816,\n", " 'end': 817},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99953973,\n", " 'index': 199,\n", " 'word': '▁and',\n", " 'start': 818,\n", " 'end': 821},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99747664,\n", " 'index': 200,\n", " 'word': '▁at',\n", " 'start': 822,\n", " 'end': 824},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9958331,\n", " 'index': 201,\n", " 'word': 'rra',\n", " 'start': 824,\n", " 'end': 827},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99711335,\n", " 'index': 202,\n", " 'word': 'ct',\n", " 'start': 827,\n", " 'end': 829},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99565244,\n", " 'index': 203,\n", " 'word': '▁attention',\n", " 'start': 830,\n", " 'end': 839},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99948764,\n", " 'index': 204,\n", " 'word': '▁to',\n", " 'start': 840,\n", " 'end': 842},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9992853,\n", " 'index': 205,\n", " 'word': '▁Mars',\n", " 'start': 843,\n", " 'end': 847},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9996164,\n", " 'index': 206,\n", " 'word': '-',\n", " 'start': 847,\n", " 'end': 848},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99963045,\n", " 'index': 207,\n", " 'word': '-',\n", " 'start': 848,\n", " 'end': 849},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9995757,\n", " 'index': 208,\n", " 'word': '▁and',\n", " 'start': 850,\n", " 'end': 853},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9984617,\n", " 'index': 209,\n", " 'word': '▁it',\n", " 'start': 854,\n", " 'end': 856},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9978162,\n", " 'index': 210,\n", " 'word': '▁did',\n", " 'start': 857,\n", " 'end': 860},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99782467,\n", " 'index': 211,\n", " 'word': '.',\n", " 'start': 860,\n", " 'end': 861},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9902649,\n", " 'index': 212,\n", " 'word': '▁The',\n", " 'start': 863,\n", " 'end': 866},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9821962,\n", " 'index': 213,\n", " 'word': '▁face',\n", " 'start': 867,\n", " 'end': 871},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98453516,\n", " 'index': 214,\n", " 'word': '▁on',\n", " 'start': 872,\n", " 'end': 874},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98241407,\n", " 'index': 215,\n", " 'word': '▁Mars',\n", " 'start': 875,\n", " 'end': 879},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9981958,\n", " 'index': 216,\n", " 'word': '▁soon',\n", " 'start': 880,\n", " 'end': 884},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9987692,\n", " 'index': 217,\n", " 'word': '▁became',\n", " 'start': 885,\n", " 'end': 891},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9987563,\n", " 'index': 218,\n", " 'word': '▁a',\n", " 'start': 892,\n", " 'end': 893},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99791545,\n", " 'index': 219,\n", " 'word': '▁pop',\n", " 'start': 894,\n", " 'end': 897},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9983084,\n", " 'index': 220,\n", " 'word': '▁icon',\n", " 'start': 898,\n", " 'end': 902},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9985397,\n", " 'index': 221,\n", " 'word': ';',\n", " 'start': 902,\n", " 'end': 903},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.986997,\n", " 'index': 222,\n", " 'word': '▁shot',\n", " 'start': 904,\n", " 'end': 908},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99500024,\n", " 'index': 223,\n", " 'word': '▁in',\n", " 'start': 909,\n", " 'end': 911},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9940572,\n", " 'index': 224,\n", " 'word': '▁movies',\n", " 'start': 912,\n", " 'end': 918},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9948042,\n", " 'index': 225,\n", " 'word': ',',\n", " 'start': 918,\n", " 'end': 919},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9830314,\n", " 'index': 226,\n", " 'word': '▁appeared',\n", " 'start': 920,\n", " 'end': 928},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99576855,\n", " 'index': 227,\n", " 'word': '▁in',\n", " 'start': 929,\n", " 'end': 931},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9962681,\n", " 'index': 228,\n", " 'word': '▁books',\n", " 'start': 932,\n", " 'end': 937},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.997297,\n", " 'index': 229,\n", " 'word': ',',\n", " 'start': 937,\n", " 'end': 938},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9969189,\n", " 'index': 230,\n", " 'word': '▁magazine',\n", " 'start': 939,\n", " 'end': 947},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9965844,\n", " 'index': 231,\n", " 'word': 's',\n", " 'start': 947,\n", " 'end': 948},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9970284,\n", " 'index': 232,\n", " 'word': ',',\n", " 'start': 948,\n", " 'end': 949},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9969007,\n", " 'index': 233,\n", " 'word': '▁radio',\n", " 'start': 950,\n", " 'end': 955},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99603075,\n", " 'index': 234,\n", " 'word': '▁talk',\n", " 'start': 956,\n", " 'end': 960},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99737513,\n", " 'index': 235,\n", " 'word': '▁shows',\n", " 'start': 961,\n", " 'end': 966},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9977088,\n", " 'index': 236,\n", " 'word': ',',\n", " 'start': 966,\n", " 'end': 967},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9976942,\n", " 'index': 237,\n", " 'word': '▁and',\n", " 'start': 968,\n", " 'end': 971},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9749291,\n", " 'index': 238,\n", " 'word': '▁ha',\n", " 'start': 972,\n", " 'end': 974},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9755974,\n", " 'index': 239,\n", " 'word': 'un',\n", " 'start': 974,\n", " 'end': 976},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.97456145,\n", " 'index': 240,\n", " 'word': 'ted',\n", " 'start': 976,\n", " 'end': 979},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.97627914,\n", " 'index': 241,\n", " 'word': '▁gro',\n", " 'start': 980,\n", " 'end': 983},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9766211,\n", " 'index': 242,\n", " 'word': 'cer',\n", " 'start': 983,\n", " 'end': 986},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9755186,\n", " 'index': 243,\n", " 'word': 'y',\n", " 'start': 986,\n", " 'end': 987},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.97471875,\n", " 'index': 244,\n", " 'word': '▁store',\n", " 'start': 988,\n", " 'end': 993},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.97767866,\n", " 'index': 245,\n", " 'word': '▁check',\n", " 'start': 994,\n", " 'end': 999},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.97759926,\n", " 'index': 246,\n", " 'word': 'out',\n", " 'start': 999,\n", " 'end': 1002},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98035,\n", " 'index': 247,\n", " 'word': '▁lines',\n", " 'start': 1003,\n", " 'end': 1008},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99741083,\n", " 'index': 248,\n", " 'word': '▁for',\n", " 'start': 1009,\n", " 'end': 1012},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99673516,\n", " 'index': 249,\n", " 'word': '▁25',\n", " 'start': 1013,\n", " 'end': 1015},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99663204,\n", " 'index': 250,\n", " 'word': '▁years',\n", " 'start': 1016,\n", " 'end': 1021},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9973544,\n", " 'index': 251,\n", " 'word': '.',\n", " 'start': 1021,\n", " 'end': 1022},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9906943,\n", " 'index': 252,\n", " 'word': '▁Some',\n", " 'start': 1023,\n", " 'end': 1027},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9870733,\n", " 'index': 253,\n", " 'word': '▁people',\n", " 'start': 1028,\n", " 'end': 1034},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98770535,\n", " 'index': 254,\n", " 'word': '▁thought',\n", " 'start': 1035,\n", " 'end': 1042},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98852706,\n", " 'index': 255,\n", " 'word': '▁the',\n", " 'start': 1043,\n", " 'end': 1046},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.97648376,\n", " 'index': 256,\n", " 'word': '▁natural',\n", " 'start': 1047,\n", " 'end': 1054},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98154485,\n", " 'index': 257,\n", " 'word': '▁land',\n", " 'start': 1055,\n", " 'end': 1059},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9710462,\n", " 'index': 258,\n", " 'word': 'form',\n", " 'start': 1059,\n", " 'end': 1063},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9930674,\n", " 'index': 259,\n", " 'word': '▁was',\n", " 'start': 1064,\n", " 'end': 1067},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9904585,\n", " 'index': 260,\n", " 'word': '▁evidence',\n", " 'start': 1068,\n", " 'end': 1076},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99835026,\n", " 'index': 261,\n", " 'word': '▁of',\n", " 'start': 1077,\n", " 'end': 1079},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.96821344,\n", " 'index': 262,\n", " 'word': '▁life',\n", " 'start': 1080,\n", " 'end': 1084},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9988362,\n", " 'index': 263,\n", " 'word': '▁on',\n", " 'start': 1085,\n", " 'end': 1087},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99900466,\n", " 'index': 264,\n", " 'word': '▁Mars',\n", " 'start': 1088,\n", " 'end': 1092},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9994442,\n", " 'index': 265,\n", " 'word': ',',\n", " 'start': 1092,\n", " 'end': 1093},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99967086,\n", " 'index': 266,\n", " 'word': '▁and',\n", " 'start': 1094,\n", " 'end': 1097},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9990483,\n", " 'index': 267,\n", " 'word': '▁that',\n", " 'start': 1098,\n", " 'end': 1102},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99356836,\n", " 'index': 268,\n", " 'word': '▁us',\n", " 'start': 1103,\n", " 'end': 1105},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9960284,\n", " 'index': 269,\n", " 'word': '▁scientist',\n", " 'start': 1106,\n", " 'end': 1115},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9963697,\n", " 'index': 270,\n", " 'word': 's',\n", " 'start': 1115,\n", " 'end': 1116},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9983851,\n", " 'index': 271,\n", " 'word': '▁wanted',\n", " 'start': 1117,\n", " 'end': 1123},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9986203,\n", " 'index': 272,\n", " 'word': '▁to',\n", " 'start': 1124,\n", " 'end': 1126},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9983407,\n", " 'index': 273,\n", " 'word': '▁hi',\n", " 'start': 1127,\n", " 'end': 1129},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99833196,\n", " 'index': 274,\n", " 'word': 'de',\n", " 'start': 1129,\n", " 'end': 1131},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99933356,\n", " 'index': 275,\n", " 'word': '▁it',\n", " 'start': 1132,\n", " 'end': 1134},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99929845,\n", " 'index': 276,\n", " 'word': ',',\n", " 'start': 1134,\n", " 'end': 1135},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99945015,\n", " 'index': 277,\n", " 'word': '▁but',\n", " 'start': 1136,\n", " 'end': 1139},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.999064,\n", " 'index': 278,\n", " 'word': '▁really',\n", " 'start': 1140,\n", " 'end': 1146},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.998408,\n", " 'index': 279,\n", " 'word': ',',\n", " 'start': 1146,\n", " 'end': 1147},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9786945,\n", " 'index': 280,\n", " 'word': '▁the',\n", " 'start': 1148,\n", " 'end': 1151},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9525546,\n", " 'index': 281,\n", " 'word': '▁defender',\n", " 'start': 1152,\n", " 'end': 1160},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9473736,\n", " 'index': 282,\n", " 'word': 's',\n", " 'start': 1160,\n", " 'end': 1161},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9641373,\n", " 'index': 283,\n", " 'word': '▁of',\n", " 'start': 1162,\n", " 'end': 1164},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9595204,\n", " 'index': 284,\n", " 'word': '▁the',\n", " 'start': 1165,\n", " 'end': 1168},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9465081,\n", " 'index': 285,\n", " 'word': '▁NASA',\n", " 'start': 1169,\n", " 'end': 1173},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.956375,\n", " 'index': 286,\n", " 'word': '▁budget',\n", " 'start': 1174,\n", " 'end': 1180},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9938366,\n", " 'index': 287,\n", " 'word': '▁wish',\n", " 'start': 1181,\n", " 'end': 1185},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99663156,\n", " 'index': 288,\n", " 'word': '▁there',\n", " 'start': 1186,\n", " 'end': 1191},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9960407,\n", " 'index': 289,\n", " 'word': '▁was',\n", " 'start': 1192,\n", " 'end': 1195},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9943784,\n", " 'index': 290,\n", " 'word': '▁an',\n", " 'start': 1196,\n", " 'end': 1198},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9929704,\n", " 'index': 291,\n", " 'word': 'cient',\n", " 'start': 1198,\n", " 'end': 1203},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9938042,\n", " 'index': 292,\n", " 'word': '▁civiliza',\n", " 'start': 1204,\n", " 'end': 1212},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99334246,\n", " 'index': 293,\n", " 'word': 'tion',\n", " 'start': 1212,\n", " 'end': 1216},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9981047,\n", " 'index': 294,\n", " 'word': '▁on',\n", " 'start': 1217,\n", " 'end': 1219},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99802125,\n", " 'index': 295,\n", " 'word': '▁Mars',\n", " 'start': 1220,\n", " 'end': 1224},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9974251,\n", " 'index': 296,\n", " 'word': '.',\n", " 'start': 1224,\n", " 'end': 1225},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99758637,\n", " 'index': 297,\n", " 'word': '▁We',\n", " 'start': 1226,\n", " 'end': 1228},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99532855,\n", " 'index': 298,\n", " 'word': '▁decided',\n", " 'start': 1229,\n", " 'end': 1236},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9968822,\n", " 'index': 299,\n", " 'word': '▁to',\n", " 'start': 1237,\n", " 'end': 1239},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99047685,\n", " 'index': 300,\n", " 'word': '▁take',\n", " 'start': 1240,\n", " 'end': 1244},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98163795,\n", " 'index': 301,\n", " 'word': '▁another',\n", " 'start': 1245,\n", " 'end': 1252},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.981338,\n", " 'index': 302,\n", " 'word': '▁shot',\n", " 'start': 1253,\n", " 'end': 1257},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9976725,\n", " 'index': 303,\n", " 'word': '▁just',\n", " 'start': 1258,\n", " 'end': 1262},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9989598,\n", " 'index': 304,\n", " 'word': '▁to',\n", " 'start': 1263,\n", " 'end': 1265},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99915683,\n", " 'index': 305,\n", " 'word': '▁make',\n", " 'start': 1266,\n", " 'end': 1270},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9990343,\n", " 'index': 306,\n", " 'word': '▁sure',\n", " 'start': 1271,\n", " 'end': 1275},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9994629,\n", " 'index': 307,\n", " 'word': '▁we',\n", " 'start': 1276,\n", " 'end': 1278},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99965143,\n", " 'index': 308,\n", " 'word': '▁were',\n", " 'start': 1279,\n", " 'end': 1283},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9996381,\n", " 'index': 309,\n", " 'word': 'n',\n", " 'start': 1283,\n", " 'end': 1284},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9996587,\n", " 'index': 310,\n", " 'word': \"'\",\n", " 'start': 1284,\n", " 'end': 1285},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99964845,\n", " 'index': 311,\n", " 'word': 't',\n", " 'start': 1285,\n", " 'end': 1286},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9994722,\n", " 'index': 312,\n", " 'word': '▁wrong',\n", " 'start': 1287,\n", " 'end': 1292},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99955314,\n", " 'index': 313,\n", " 'word': ',',\n", " 'start': 1292,\n", " 'end': 1293},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99804926,\n", " 'index': 314,\n", " 'word': '▁on',\n", " 'start': 1294,\n", " 'end': 1296},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9966838,\n", " 'index': 315,\n", " 'word': '▁April',\n", " 'start': 1297,\n", " 'end': 1302},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99728274,\n", " 'index': 316,\n", " 'word': '▁5',\n", " 'start': 1303,\n", " 'end': 1304},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99812526,\n", " 'index': 317,\n", " 'word': ',',\n", " 'start': 1304,\n", " 'end': 1305},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99731195,\n", " 'index': 318,\n", " 'word': '▁1998.',\n", " 'start': 1306,\n", " 'end': 1311},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.6198232,\n", " 'index': 319,\n", " 'word': '▁Michael',\n", " 'start': 1312,\n", " 'end': 1319},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.6221296,\n", " 'index': 320,\n", " 'word': '▁Malin',\n", " 'start': 1320,\n", " 'end': 1325},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.95554626,\n", " 'index': 321,\n", " 'word': '▁and',\n", " 'start': 1326,\n", " 'end': 1329},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.93194216,\n", " 'index': 322,\n", " 'word': '▁his',\n", " 'start': 1330,\n", " 'end': 1333},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.7339511,\n", " 'index': 323,\n", " 'word': '▁Mars',\n", " 'start': 1334,\n", " 'end': 1338},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.7226213,\n", " 'index': 324,\n", " 'word': '▁Or',\n", " 'start': 1339,\n", " 'end': 1341},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.7723435,\n", " 'index': 325,\n", " 'word': 'bit',\n", " 'start': 1341,\n", " 'end': 1344},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.76182514,\n", " 'index': 326,\n", " 'word': 'er',\n", " 'start': 1344,\n", " 'end': 1346},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.87562317,\n", " 'index': 327,\n", " 'word': '▁camera',\n", " 'start': 1347,\n", " 'end': 1353},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.89518005,\n", " 'index': 328,\n", " 'word': '▁team',\n", " 'start': 1354,\n", " 'end': 1358},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9218035,\n", " 'index': 329,\n", " 'word': '▁took',\n", " 'start': 1359,\n", " 'end': 1363},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9225395,\n", " 'index': 330,\n", " 'word': '▁a',\n", " 'start': 1364,\n", " 'end': 1365},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.916193,\n", " 'index': 331,\n", " 'word': '▁picture',\n", " 'start': 1366,\n", " 'end': 1373},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9926691,\n", " 'index': 332,\n", " 'word': '▁that',\n", " 'start': 1374,\n", " 'end': 1378},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9904732,\n", " 'index': 333,\n", " 'word': '▁was',\n", " 'start': 1379,\n", " 'end': 1382},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98267406,\n", " 'index': 334,\n", " 'word': '▁ten',\n", " 'start': 1383,\n", " 'end': 1386},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9849161,\n", " 'index': 335,\n", " 'word': '▁times',\n", " 'start': 1387,\n", " 'end': 1392},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9801893,\n", " 'index': 336,\n", " 'word': '▁sharp',\n", " 'start': 1393,\n", " 'end': 1398},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98280776,\n", " 'index': 337,\n", " 'word': 'er',\n", " 'start': 1398,\n", " 'end': 1400},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99816483,\n", " 'index': 338,\n", " 'word': '▁than',\n", " 'start': 1401,\n", " 'end': 1405},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99864274,\n", " 'index': 339,\n", " 'word': '▁the',\n", " 'start': 1406,\n", " 'end': 1409},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9981275,\n", " 'index': 340,\n", " 'word': '▁original',\n", " 'start': 1410,\n", " 'end': 1418},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9918675,\n", " 'index': 341,\n", " 'word': '▁Viking',\n", " 'start': 1419,\n", " 'end': 1425},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99843293,\n", " 'index': 342,\n", " 'word': '▁photos',\n", " 'start': 1426,\n", " 'end': 1432},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99834,\n", " 'index': 343,\n", " 'word': ',',\n", " 'start': 1432,\n", " 'end': 1433},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99184215,\n", " 'index': 344,\n", " 'word': '▁reveal',\n", " 'start': 1434,\n", " 'end': 1440},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99084723,\n", " 'index': 345,\n", " 'word': 'ing',\n", " 'start': 1440,\n", " 'end': 1443},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9979195,\n", " 'index': 346,\n", " 'word': '▁a',\n", " 'start': 1444,\n", " 'end': 1445},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99878365,\n", " 'index': 347,\n", " 'word': '▁natural',\n", " 'start': 1446,\n", " 'end': 1453},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9988181,\n", " 'index': 348,\n", " 'word': '▁land',\n", " 'start': 1454,\n", " 'end': 1458},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9989188,\n", " 'index': 349,\n", " 'word': 'form',\n", " 'start': 1458,\n", " 'end': 1462},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9987502,\n", " 'index': 350,\n", " 'word': ',',\n", " 'start': 1462,\n", " 'end': 1463},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99949133,\n", " 'index': 351,\n", " 'word': '▁which',\n", " 'start': 1464,\n", " 'end': 1469},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99865425,\n", " 'index': 352,\n", " 'word': '▁meant',\n", " 'start': 1470,\n", " 'end': 1475},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9977582,\n", " 'index': 353,\n", " 'word': '▁no',\n", " 'start': 1476,\n", " 'end': 1478},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99797624,\n", " 'index': 354,\n", " 'word': '▁alien',\n", " 'start': 1479,\n", " 'end': 1484},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.998558,\n", " 'index': 355,\n", " 'word': '▁monument',\n", " 'start': 1485,\n", " 'end': 1493},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9985544,\n", " 'index': 356,\n", " 'word': '.',\n", " 'start': 1493,\n", " 'end': 1494},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9991504,\n", " 'index': 357,\n", " 'word': '▁\"',\n", " 'start': 1495,\n", " 'end': 1496},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99959415,\n", " 'index': 358,\n", " 'word': 'But',\n", " 'start': 1496,\n", " 'end': 1499},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99847144,\n", " 'index': 359,\n", " 'word': '▁that',\n", " 'start': 1500,\n", " 'end': 1504},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9965953,\n", " 'index': 360,\n", " 'word': '▁picture',\n", " 'start': 1505,\n", " 'end': 1512},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9989673,\n", " 'index': 361,\n", " 'word': '▁wasn',\n", " 'start': 1513,\n", " 'end': 1517},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.999243,\n", " 'index': 362,\n", " 'word': \"'\",\n", " 'start': 1517,\n", " 'end': 1518},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99926895,\n", " 'index': 363,\n", " 'word': 't',\n", " 'start': 1518,\n", " 'end': 1519},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9985392,\n", " 'index': 364,\n", " 'word': '▁very',\n", " 'start': 1520,\n", " 'end': 1524},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9978017,\n", " 'index': 365,\n", " 'word': '▁clear',\n", " 'start': 1525,\n", " 'end': 1530},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99971443,\n", " 'index': 366,\n", " 'word': '▁at',\n", " 'start': 1531,\n", " 'end': 1533},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99972814,\n", " 'index': 367,\n", " 'word': '▁all',\n", " 'start': 1534,\n", " 'end': 1537},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99969304,\n", " 'index': 368,\n", " 'word': ',',\n", " 'start': 1537,\n", " 'end': 1538},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99946886,\n", " 'index': 369,\n", " 'word': '▁which',\n", " 'start': 1539,\n", " 'end': 1544},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99923897,\n", " 'index': 370,\n", " 'word': '▁could',\n", " 'start': 1545,\n", " 'end': 1550},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9993623,\n", " 'index': 371,\n", " 'word': '▁mean',\n", " 'start': 1551,\n", " 'end': 1555},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99697757,\n", " 'index': 372,\n", " 'word': '▁alien',\n", " 'start': 1556,\n", " 'end': 1561},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9969267,\n", " 'index': 373,\n", " 'word': '▁mark',\n", " 'start': 1562,\n", " 'end': 1566},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99740356,\n", " 'index': 374,\n", " 'word': 'ings',\n", " 'start': 1566,\n", " 'end': 1570},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9990921,\n", " 'index': 375,\n", " 'word': '▁were',\n", " 'start': 1571,\n", " 'end': 1575},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9988734,\n", " 'index': 376,\n", " 'word': '▁hidden',\n", " 'start': 1576,\n", " 'end': 1582},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9993931,\n", " 'index': 377,\n", " 'word': '▁by',\n", " 'start': 1583,\n", " 'end': 1585},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9993395,\n", " 'index': 378,\n", " 'word': '▁ha',\n", " 'start': 1586,\n", " 'end': 1588},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9994405,\n", " 'index': 379,\n", " 'word': 'ze',\n", " 'start': 1588,\n", " 'end': 1590},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9996549,\n", " 'index': 380,\n", " 'word': '\"',\n", " 'start': 1590,\n", " 'end': 1591},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9997403,\n", " 'index': 381,\n", " 'word': '▁Well',\n", " 'start': 1592,\n", " 'end': 1596},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9997552,\n", " 'index': 382,\n", " 'word': '▁no',\n", " 'start': 1597,\n", " 'end': 1599},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9997204,\n", " 'index': 383,\n", " 'word': ',',\n", " 'start': 1599,\n", " 'end': 1600},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9994294,\n", " 'index': 384,\n", " 'word': '▁yes',\n", " 'start': 1601,\n", " 'end': 1604},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9963684,\n", " 'index': 385,\n", " 'word': '▁that',\n", " 'start': 1605,\n", " 'end': 1609},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9943778,\n", " 'index': 386,\n", " 'word': '▁rumor',\n", " 'start': 1610,\n", " 'end': 1615},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99859875,\n", " 'index': 387,\n", " 'word': '▁started',\n", " 'start': 1616,\n", " 'end': 1623},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99874413,\n", " 'index': 388,\n", " 'word': ',',\n", " 'start': 1623,\n", " 'end': 1624},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9991598,\n", " 'index': 389,\n", " 'word': '▁but',\n", " 'start': 1625,\n", " 'end': 1628},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9980592,\n", " 'index': 390,\n", " 'word': '▁to',\n", " 'start': 1629,\n", " 'end': 1631},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99275404,\n", " 'index': 391,\n", " 'word': '▁prove',\n", " 'start': 1632,\n", " 'end': 1637},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9936196,\n", " 'index': 392,\n", " 'word': '▁them',\n", " 'start': 1638,\n", " 'end': 1642},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9883024,\n", " 'index': 393,\n", " 'word': '▁wrong',\n", " 'start': 1643,\n", " 'end': 1648},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99685585,\n", " 'index': 394,\n", " 'word': '▁on',\n", " 'start': 1649,\n", " 'end': 1651},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9905654,\n", " 'index': 395,\n", " 'word': '▁April',\n", " 'start': 1652,\n", " 'end': 1657},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9897703,\n", " 'index': 396,\n", " 'word': '▁8',\n", " 'start': 1658,\n", " 'end': 1659},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9914705,\n", " 'index': 397,\n", " 'word': ',',\n", " 'start': 1659,\n", " 'end': 1660},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.989052,\n", " 'index': 398,\n", " 'word': '▁2001',\n", " 'start': 1661,\n", " 'end': 1665},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9988476,\n", " 'index': 399,\n", " 'word': '▁we',\n", " 'start': 1666,\n", " 'end': 1668},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9976255,\n", " 'index': 400,\n", " 'word': '▁decided',\n", " 'start': 1669,\n", " 'end': 1676},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9990896,\n", " 'index': 401,\n", " 'word': '▁to',\n", " 'start': 1677,\n", " 'end': 1679},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9982212,\n", " 'index': 402,\n", " 'word': '▁take',\n", " 'start': 1680,\n", " 'end': 1684},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9987219,\n", " 'index': 403,\n", " 'word': '▁another',\n", " 'start': 1685,\n", " 'end': 1692},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9987041,\n", " 'index': 404,\n", " 'word': '▁picture',\n", " 'start': 1693,\n", " 'end': 1700},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99892056,\n", " 'index': 405,\n", " 'word': ',',\n", " 'start': 1700,\n", " 'end': 1701},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9990138,\n", " 'index': 406,\n", " 'word': '▁making',\n", " 'start': 1702,\n", " 'end': 1708},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9987526,\n", " 'index': 407,\n", " 'word': '▁sure',\n", " 'start': 1709,\n", " 'end': 1713},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99886996,\n", " 'index': 408,\n", " 'word': '▁it',\n", " 'start': 1714,\n", " 'end': 1716},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99928397,\n", " 'index': 409,\n", " 'word': '▁was',\n", " 'start': 1717,\n", " 'end': 1720},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9986174,\n", " 'index': 410,\n", " 'word': '▁a',\n", " 'start': 1721,\n", " 'end': 1722},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9980627,\n", " 'index': 411,\n", " 'word': '▁cloud',\n", " 'start': 1723,\n", " 'end': 1728},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9979323,\n", " 'index': 412,\n", " 'word': 'less',\n", " 'start': 1728,\n", " 'end': 1732},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99851483,\n", " 'index': 413,\n", " 'word': '▁summer',\n", " 'start': 1733,\n", " 'end': 1739},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99867994,\n", " 'index': 414,\n", " 'word': '▁day',\n", " 'start': 1740,\n", " 'end': 1743},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9986368,\n", " 'index': 415,\n", " 'word': '.',\n", " 'start': 1743,\n", " 'end': 1744},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9535012,\n", " 'index': 416,\n", " 'word': '▁Malin',\n", " 'start': 1745,\n", " 'end': 1750},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.95024145,\n", " 'index': 417,\n", " 'word': \"'\",\n", " 'start': 1750,\n", " 'end': 1751},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9507696,\n", " 'index': 418,\n", " 'word': 's',\n", " 'start': 1751,\n", " 'end': 1752},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9469109,\n", " 'index': 419,\n", " 'word': '▁team',\n", " 'start': 1753,\n", " 'end': 1757},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.8001874,\n", " 'index': 420,\n", " 'word': '▁capture',\n", " 'start': 1758,\n", " 'end': 1765},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.7871362,\n", " 'index': 421,\n", " 'word': 'd',\n", " 'start': 1765,\n", " 'end': 1766},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.7254455,\n", " 'index': 422,\n", " 'word': '▁an',\n", " 'start': 1767,\n", " 'end': 1769},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.6367166,\n", " 'index': 423,\n", " 'word': '▁amazing',\n", " 'start': 1770,\n", " 'end': 1777},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.6151613,\n", " 'index': 424,\n", " 'word': '▁photo',\n", " 'start': 1778,\n", " 'end': 1783},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9940475,\n", " 'index': 425,\n", " 'word': '▁using',\n", " 'start': 1784,\n", " 'end': 1789},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99607027,\n", " 'index': 426,\n", " 'word': '▁the',\n", " 'start': 1790,\n", " 'end': 1793},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9946696,\n", " 'index': 427,\n", " 'word': '▁camera',\n", " 'start': 1794,\n", " 'end': 1800},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9957001,\n", " 'index': 428,\n", " 'word': \"'\",\n", " 'start': 1800,\n", " 'end': 1801},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99648213,\n", " 'index': 429,\n", " 'word': 's',\n", " 'start': 1801,\n", " 'end': 1802},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9934956,\n", " 'index': 430,\n", " 'word': '▁absolute',\n", " 'start': 1803,\n", " 'end': 1811},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9946866,\n", " 'index': 431,\n", " 'word': '▁maximum',\n", " 'start': 1812,\n", " 'end': 1819},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99592364,\n", " 'index': 432,\n", " 'word': '▁revolution',\n", " 'start': 1820,\n", " 'end': 1830},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99613345,\n", " 'index': 433,\n", " 'word': '.',\n", " 'start': 1830,\n", " 'end': 1831},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9986493,\n", " 'index': 434,\n", " 'word': '▁With',\n", " 'start': 1832,\n", " 'end': 1836},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98783743,\n", " 'index': 435,\n", " 'word': '▁this',\n", " 'start': 1837,\n", " 'end': 1841},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98299176,\n", " 'index': 436,\n", " 'word': '▁camera',\n", " 'start': 1842,\n", " 'end': 1848},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9948515,\n", " 'index': 437,\n", " 'word': '▁you',\n", " 'start': 1849,\n", " 'end': 1852},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99211496,\n", " 'index': 438,\n", " 'word': '▁can',\n", " 'start': 1853,\n", " 'end': 1856},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98975813,\n", " 'index': 439,\n", " 'word': '▁discern',\n", " 'start': 1857,\n", " 'end': 1864},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99309033,\n", " 'index': 440,\n", " 'word': '▁things',\n", " 'start': 1865,\n", " 'end': 1871},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99935037,\n", " 'index': 441,\n", " 'word': '▁in',\n", " 'start': 1872,\n", " 'end': 1874},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99924976,\n", " 'index': 442,\n", " 'word': '▁a',\n", " 'start': 1875,\n", " 'end': 1876},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9993017,\n", " 'index': 443,\n", " 'word': '▁digital',\n", " 'start': 1877,\n", " 'end': 1884},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99953175,\n", " 'index': 444,\n", " 'word': '▁image',\n", " 'start': 1885,\n", " 'end': 1890},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9992694,\n", " 'index': 445,\n", " 'word': ',',\n", " 'start': 1890,\n", " 'end': 1891},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9953614,\n", " 'index': 446,\n", " 'word': '▁3',\n", " 'start': 1892,\n", " 'end': 1893},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99575055,\n", " 'index': 447,\n", " 'word': '▁times',\n", " 'start': 1894,\n", " 'end': 1899},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9946045,\n", " 'index': 448,\n", " 'word': '▁bigger',\n", " 'start': 1900,\n", " 'end': 1906},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99767905,\n", " 'index': 449,\n", " 'word': '▁than',\n", " 'start': 1907,\n", " 'end': 1911},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9985954,\n", " 'index': 450,\n", " 'word': '▁the',\n", " 'start': 1912,\n", " 'end': 1915},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99617577,\n", " 'index': 451,\n", " 'word': '▁pixel',\n", " 'start': 1916,\n", " 'end': 1921},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9971623,\n", " 'index': 452,\n", " 'word': '▁size',\n", " 'start': 1922,\n", " 'end': 1926},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.999595,\n", " 'index': 453,\n", " 'word': '▁which',\n", " 'start': 1927,\n", " 'end': 1932},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99959654,\n", " 'index': 454,\n", " 'word': '▁means',\n", " 'start': 1933,\n", " 'end': 1938},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9997098,\n", " 'index': 455,\n", " 'word': '▁if',\n", " 'start': 1939,\n", " 'end': 1941},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9997428,\n", " 'index': 456,\n", " 'word': '▁there',\n", " 'start': 1942,\n", " 'end': 1947},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99971575,\n", " 'index': 457,\n", " 'word': '▁were',\n", " 'start': 1948,\n", " 'end': 1952},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9997061,\n", " 'index': 458,\n", " 'word': '▁any',\n", " 'start': 1953,\n", " 'end': 1956},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9994355,\n", " 'index': 459,\n", " 'word': '▁sign',\n", " 'start': 1957,\n", " 'end': 1961},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9995265,\n", " 'index': 460,\n", " 'word': 's',\n", " 'start': 1961,\n", " 'end': 1962},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99968946,\n", " 'index': 461,\n", " 'word': '▁of',\n", " 'start': 1963,\n", " 'end': 1965},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.96821344,\n", " 'index': 462,\n", " 'word': '▁life',\n", " 'start': 1966,\n", " 'end': 1970},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99971575,\n", " 'index': 463,\n", " 'word': ',',\n", " 'start': 1970,\n", " 'end': 1971},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99914134,\n", " 'index': 464,\n", " 'word': '▁you',\n", " 'start': 1972,\n", " 'end': 1975},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99893373,\n", " 'index': 465,\n", " 'word': '▁could',\n", " 'start': 1976,\n", " 'end': 1981},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99874496,\n", " 'index': 466,\n", " 'word': '▁easily',\n", " 'start': 1982,\n", " 'end': 1988},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99866235,\n", " 'index': 467,\n", " 'word': '▁see',\n", " 'start': 1989,\n", " 'end': 1992},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99949706,\n", " 'index': 468,\n", " 'word': '▁what',\n", " 'start': 1993,\n", " 'end': 1997},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9990375,\n", " 'index': 469,\n", " 'word': '▁they',\n", " 'start': 1998,\n", " 'end': 2002},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99939954,\n", " 'index': 470,\n", " 'word': '▁were',\n", " 'start': 2003,\n", " 'end': 2007},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9991054,\n", " 'index': 471,\n", " 'word': '.',\n", " 'start': 2007,\n", " 'end': 2008},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9985911,\n", " 'index': 472,\n", " 'word': '▁What',\n", " 'start': 2009,\n", " 'end': 2013},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99561054,\n", " 'index': 473,\n", " 'word': '▁the',\n", " 'start': 2014,\n", " 'end': 2017},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99237365,\n", " 'index': 474,\n", " 'word': '▁picture',\n", " 'start': 2018,\n", " 'end': 2025},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99209166,\n", " 'index': 475,\n", " 'word': '▁showed',\n", " 'start': 2026,\n", " 'end': 2032},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99855715,\n", " 'index': 476,\n", " 'word': '▁was',\n", " 'start': 2033,\n", " 'end': 2036},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9982992,\n", " 'index': 477,\n", " 'word': '▁the',\n", " 'start': 2037,\n", " 'end': 2040},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99486816,\n", " 'index': 478,\n", " 'word': '▁but',\n", " 'start': 2041,\n", " 'end': 2044},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9939652,\n", " 'index': 479,\n", " 'word': 'te',\n", " 'start': 2044,\n", " 'end': 2046},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9994578,\n", " 'index': 480,\n", " 'word': '▁or',\n", " 'start': 2047,\n", " 'end': 2049},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99937266,\n", " 'index': 481,\n", " 'word': '▁mesa',\n", " 'start': 2050,\n", " 'end': 2054},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9994253,\n", " 'index': 482,\n", " 'word': ',',\n", " 'start': 2054,\n", " 'end': 2055},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99890506,\n", " 'index': 483,\n", " 'word': '▁which',\n", " 'start': 2056,\n", " 'end': 2061},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99758005,\n", " 'index': 484,\n", " 'word': '▁are',\n", " 'start': 2062,\n", " 'end': 2065},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9942561,\n", " 'index': 485,\n", " 'word': '▁land',\n", " 'start': 2066,\n", " 'end': 2070},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9909231,\n", " 'index': 486,\n", " 'word': 'form',\n", " 'start': 2070,\n", " 'end': 2074},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9923442,\n", " 'index': 487,\n", " 'word': 's',\n", " 'start': 2074,\n", " 'end': 2075},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9924804,\n", " 'index': 488,\n", " 'word': '▁common',\n", " 'start': 2076,\n", " 'end': 2082},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.998198,\n", " 'index': 489,\n", " 'word': '▁around',\n", " 'start': 2083,\n", " 'end': 2089},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9988335,\n", " 'index': 490,\n", " 'word': '▁the',\n", " 'start': 2090,\n", " 'end': 2093},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99893695,\n", " 'index': 491,\n", " 'word': '▁American',\n", " 'start': 2094,\n", " 'end': 2102},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9991405,\n", " 'index': 492,\n", " 'word': '▁West',\n", " 'start': 2103,\n", " 'end': 2107},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99888045,\n", " 'index': 493,\n", " 'word': '.',\n", " 'start': 2107,\n", " 'end': 2108}]" ] }, "execution_count": 95, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"dejanseo/LinkBERT-XL\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"dejanseo/LinkBERT-XL\")\n", "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "\n", "ner_results = nlp(text)\n", "ner_results" ] }, { "cell_type": "markdown", "id": "96937981-4d0d-404c-ae4c-aefe77910390", "metadata": {}, "source": [ "## 45 HiTZ/mbert-argmining-abstrct-en-es" ] }, { "cell_type": "code", "execution_count": 97, "id": "8e292687-4b7c-40ab-9add-32f7190694c6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 97, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"HiTZ/mbert-argmining-abstrct-en-es\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"HiTZ/mbert-argmining-abstrct-en-es\")\n", "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "\n", "ner_results = nlp(text)\n", "ner_results" ] }, { "cell_type": "markdown", "id": "801bcc2a-74f5-4168-bd46-81080ded913e", "metadata": {}, "source": [ "## 46 HiTZ/mdeberta-expl-extraction-multi " ] }, { "cell_type": "code", "execution_count": 99, "id": "0b463753-3823-4b84-b214-530b4b2d82db", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\NW\\anaconda3\\Lib\\site-packages\\transformers\\convert_slow_tokenizer.py:560: UserWarning: The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option which is not implemented in the fast tokenizers. In practice this means that the fast version of the tokenizer can produce unknown tokens whereas the sentencepiece version would have converted these unknown tokens into a sequence of byte tokens matching the original piece of text.\n", " warnings.warn(\n", "Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at HiTZ/mdeberta-expl-extraction-multi and are newly initialized: ['classifier.bias', 'classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n" ] }, { "data": { "text/plain": [ "[{'entity': 'LABEL_1',\n", " 'score': 0.69964296,\n", " 'index': 1,\n", " 'word': '▁So',\n", " 'start': 0,\n", " 'end': 2},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.68438363,\n", " 'index': 2,\n", " 'word': ',',\n", " 'start': 2,\n", " 'end': 3},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76636976,\n", " 'index': 3,\n", " 'word': '▁if',\n", " 'start': 3,\n", " 'end': 6},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7428422,\n", " 'index': 4,\n", " 'word': '▁you',\n", " 'start': 6,\n", " 'end': 10},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72554,\n", " 'index': 5,\n", " 'word': \"'\",\n", " 'start': 10,\n", " 'end': 11},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73893636,\n", " 'index': 6,\n", " 'word': 're',\n", " 'start': 11,\n", " 'end': 13},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73846024,\n", " 'index': 7,\n", " 'word': '▁',\n", " 'start': 13,\n", " 'end': 14},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7346474,\n", " 'index': 8,\n", " 'word': 'a',\n", " 'start': 14,\n", " 'end': 15},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71560675,\n", " 'index': 9,\n", " 'word': '▁NASA',\n", " 'start': 15,\n", " 'end': 20},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7557671,\n", " 'index': 10,\n", " 'word': '▁',\n", " 'start': 20,\n", " 'end': 21},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7091787,\n", " 'index': 11,\n", " 'word': 'scientist',\n", " 'start': 21,\n", " 'end': 30},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.68782264,\n", " 'index': 12,\n", " 'word': ',',\n", " 'start': 30,\n", " 'end': 31},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73133516,\n", " 'index': 13,\n", " 'word': '▁you',\n", " 'start': 31,\n", " 'end': 35},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7256754,\n", " 'index': 14,\n", " 'word': '▁should',\n", " 'start': 35,\n", " 'end': 42},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7170361,\n", " 'index': 15,\n", " 'word': '▁be',\n", " 'start': 42,\n", " 'end': 45},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7384796,\n", " 'index': 16,\n", " 'word': '▁',\n", " 'start': 45,\n", " 'end': 46},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7560725,\n", " 'index': 17,\n", " 'word': 'able',\n", " 'start': 46,\n", " 'end': 50},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7414307,\n", " 'index': 18,\n", " 'word': '▁to',\n", " 'start': 50,\n", " 'end': 53},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7181038,\n", " 'index': 19,\n", " 'word': '▁tell',\n", " 'start': 53,\n", " 'end': 58},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7363644,\n", " 'index': 20,\n", " 'word': '▁me',\n", " 'start': 58,\n", " 'end': 61},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7599349,\n", " 'index': 21,\n", " 'word': '▁the',\n", " 'start': 61,\n", " 'end': 65},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74890417,\n", " 'index': 22,\n", " 'word': '▁whole',\n", " 'start': 65,\n", " 'end': 71},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7119105,\n", " 'index': 23,\n", " 'word': '▁story',\n", " 'start': 71,\n", " 'end': 77},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7301424,\n", " 'index': 24,\n", " 'word': '▁about',\n", " 'start': 77,\n", " 'end': 83},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7458922,\n", " 'index': 25,\n", " 'word': '▁the',\n", " 'start': 83,\n", " 'end': 87},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7478779,\n", " 'index': 26,\n", " 'word': '▁Face',\n", " 'start': 87,\n", " 'end': 92},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72260505,\n", " 'index': 27,\n", " 'word': '▁On',\n", " 'start': 92,\n", " 'end': 95},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.678619,\n", " 'index': 28,\n", " 'word': '▁Mars',\n", " 'start': 95,\n", " 'end': 100},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7015995,\n", " 'index': 29,\n", " 'word': ',',\n", " 'start': 100,\n", " 'end': 101},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76157975,\n", " 'index': 30,\n", " 'word': '▁',\n", " 'start': 101,\n", " 'end': 102},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7142693,\n", " 'index': 31,\n", " 'word': 'which',\n", " 'start': 102,\n", " 'end': 107},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7759696,\n", " 'index': 32,\n", " 'word': '▁ob',\n", " 'start': 107,\n", " 'end': 110},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76725924,\n", " 'index': 33,\n", " 'word': 'viously',\n", " 'start': 110,\n", " 'end': 117},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7179181,\n", " 'index': 34,\n", " 'word': '▁is',\n", " 'start': 117,\n", " 'end': 120},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72448343,\n", " 'index': 35,\n", " 'word': '▁',\n", " 'start': 120,\n", " 'end': 121},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6992092,\n", " 'index': 36,\n", " 'word': 'evidence',\n", " 'start': 121,\n", " 'end': 129},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71810836,\n", " 'index': 37,\n", " 'word': '▁that',\n", " 'start': 129,\n", " 'end': 134},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7591603,\n", " 'index': 38,\n", " 'word': '▁there',\n", " 'start': 134,\n", " 'end': 140},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7224874,\n", " 'index': 39,\n", " 'word': '▁is',\n", " 'start': 140,\n", " 'end': 143},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7104872,\n", " 'index': 40,\n", " 'word': '▁life',\n", " 'start': 143,\n", " 'end': 148},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7061166,\n", " 'index': 41,\n", " 'word': '▁on',\n", " 'start': 148,\n", " 'end': 151},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6738205,\n", " 'index': 42,\n", " 'word': '▁Mars',\n", " 'start': 151,\n", " 'end': 156},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.66683656,\n", " 'index': 43,\n", " 'word': ',',\n", " 'start': 156,\n", " 'end': 157},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6961923,\n", " 'index': 44,\n", " 'word': '▁and',\n", " 'start': 157,\n", " 'end': 161},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7319619,\n", " 'index': 45,\n", " 'word': '▁that',\n", " 'start': 161,\n", " 'end': 166},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7237144,\n", " 'index': 46,\n", " 'word': '▁the',\n", " 'start': 166,\n", " 'end': 170},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6963166,\n", " 'index': 47,\n", " 'word': '▁face',\n", " 'start': 170,\n", " 'end': 175},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74168783,\n", " 'index': 48,\n", " 'word': '▁was',\n", " 'start': 175,\n", " 'end': 179},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71670306,\n", " 'index': 49,\n", " 'word': '▁',\n", " 'start': 179,\n", " 'end': 180},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7101172,\n", " 'index': 50,\n", " 'word': 'created',\n", " 'start': 180,\n", " 'end': 187},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7373488,\n", " 'index': 51,\n", " 'word': '▁by',\n", " 'start': 187,\n", " 'end': 190},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7071672,\n", " 'index': 52,\n", " 'word': '▁alien',\n", " 'start': 190,\n", " 'end': 196},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.70015,\n", " 'index': 53,\n", " 'word': 's',\n", " 'start': 196,\n", " 'end': 197},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6949053,\n", " 'index': 54,\n", " 'word': ',',\n", " 'start': 197,\n", " 'end': 198},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7373082,\n", " 'index': 55,\n", " 'word': '▁correct',\n", " 'start': 198,\n", " 'end': 206},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7566203,\n", " 'index': 56,\n", " 'word': '?\"',\n", " 'start': 206,\n", " 'end': 208},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.79566896,\n", " 'index': 57,\n", " 'word': '▁No',\n", " 'start': 208,\n", " 'end': 211},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7330964,\n", " 'index': 58,\n", " 'word': ',',\n", " 'start': 211,\n", " 'end': 212},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.75868404,\n", " 'index': 59,\n", " 'word': '▁',\n", " 'start': 212,\n", " 'end': 213},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.70785344,\n", " 'index': 60,\n", " 'word': 'twenty',\n", " 'start': 213,\n", " 'end': 219},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6963004,\n", " 'index': 61,\n", " 'word': '▁five',\n", " 'start': 219,\n", " 'end': 224},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7040995,\n", " 'index': 62,\n", " 'word': '▁years',\n", " 'start': 224,\n", " 'end': 230},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7281579,\n", " 'index': 63,\n", " 'word': '▁ago',\n", " 'start': 230,\n", " 'end': 234},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7362324,\n", " 'index': 64,\n", " 'word': ',',\n", " 'start': 234,\n", " 'end': 235},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.78284067,\n", " 'index': 65,\n", " 'word': '▁our',\n", " 'start': 235,\n", " 'end': 239},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7404312,\n", " 'index': 66,\n", " 'word': '▁Viking',\n", " 'start': 239,\n", " 'end': 246},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6920022,\n", " 'index': 67,\n", " 'word': '▁1',\n", " 'start': 246,\n", " 'end': 248},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.724633,\n", " 'index': 68,\n", " 'word': '▁space',\n", " 'start': 248,\n", " 'end': 254},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6913412,\n", " 'index': 69,\n", " 'word': 'craft',\n", " 'start': 254,\n", " 'end': 259},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7467937,\n", " 'index': 70,\n", " 'word': '▁was',\n", " 'start': 259,\n", " 'end': 263},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73295933,\n", " 'index': 71,\n", " 'word': '▁circ',\n", " 'start': 263,\n", " 'end': 268},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7470121,\n", " 'index': 72,\n", " 'word': 'ling',\n", " 'start': 268,\n", " 'end': 272},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7531431,\n", " 'index': 73,\n", " 'word': '▁the',\n", " 'start': 272,\n", " 'end': 276},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7101165,\n", " 'index': 74,\n", " 'word': '▁planet',\n", " 'start': 276,\n", " 'end': 283},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7100098,\n", " 'index': 75,\n", " 'word': ',',\n", " 'start': 283,\n", " 'end': 284},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7461217,\n", " 'index': 76,\n", " 'word': '▁snap',\n", " 'start': 284,\n", " 'end': 289},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.739955,\n", " 'index': 77,\n", " 'word': 'ping',\n", " 'start': 289,\n", " 'end': 293},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7169406,\n", " 'index': 78,\n", " 'word': '▁photos',\n", " 'start': 293,\n", " 'end': 300},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7049295,\n", " 'index': 79,\n", " 'word': ',',\n", " 'start': 300,\n", " 'end': 301},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7407019,\n", " 'index': 80,\n", " 'word': '▁',\n", " 'start': 301,\n", " 'end': 302},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7440167,\n", " 'index': 81,\n", " 'word': 'when',\n", " 'start': 302,\n", " 'end': 306},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7350766,\n", " 'index': 82,\n", " 'word': '▁it',\n", " 'start': 306,\n", " 'end': 309},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7492218,\n", " 'index': 83,\n", " 'word': '▁spot',\n", " 'start': 309,\n", " 'end': 314},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7348358,\n", " 'index': 84,\n", " 'word': 'ted',\n", " 'start': 314,\n", " 'end': 317},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76565677,\n", " 'index': 85,\n", " 'word': '▁the',\n", " 'start': 317,\n", " 'end': 321},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73117274,\n", " 'index': 86,\n", " 'word': '▁',\n", " 'start': 321,\n", " 'end': 322},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7081199,\n", " 'index': 87,\n", " 'word': 'shadow',\n", " 'start': 322,\n", " 'end': 328},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7255247,\n", " 'index': 88,\n", " 'word': 'y',\n", " 'start': 328,\n", " 'end': 329},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73350924,\n", " 'index': 89,\n", " 'word': '▁like',\n", " 'start': 329,\n", " 'end': 334},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72074175,\n", " 'index': 90,\n", " 'word': 'ness',\n", " 'start': 334,\n", " 'end': 338},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7409807,\n", " 'index': 91,\n", " 'word': '▁of',\n", " 'start': 338,\n", " 'end': 341},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7388687,\n", " 'index': 92,\n", " 'word': '▁',\n", " 'start': 341,\n", " 'end': 342},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73592776,\n", " 'index': 93,\n", " 'word': 'a',\n", " 'start': 342,\n", " 'end': 343},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7441289,\n", " 'index': 94,\n", " 'word': '▁human',\n", " 'start': 343,\n", " 'end': 349},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73894185,\n", " 'index': 95,\n", " 'word': '▁face',\n", " 'start': 349,\n", " 'end': 354},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76816374,\n", " 'index': 96,\n", " 'word': '.',\n", " 'start': 354,\n", " 'end': 355},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.8132516,\n", " 'index': 97,\n", " 'word': '▁Us',\n", " 'start': 355,\n", " 'end': 358},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7830225,\n", " 'index': 98,\n", " 'word': '▁',\n", " 'start': 358,\n", " 'end': 359},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7423716,\n", " 'index': 99,\n", " 'word': 'scientist',\n", " 'start': 359,\n", " 'end': 368},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72860986,\n", " 'index': 100,\n", " 'word': 's',\n", " 'start': 368,\n", " 'end': 369},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7603417,\n", " 'index': 101,\n", " 'word': '▁figure',\n", " 'start': 369,\n", " 'end': 376},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7233966,\n", " 'index': 102,\n", " 'word': 'd',\n", " 'start': 376,\n", " 'end': 377},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73558444,\n", " 'index': 103,\n", " 'word': '▁out',\n", " 'start': 377,\n", " 'end': 381},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74666375,\n", " 'index': 104,\n", " 'word': '▁that',\n", " 'start': 381,\n", " 'end': 386},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72817564,\n", " 'index': 105,\n", " 'word': '▁it',\n", " 'start': 386,\n", " 'end': 389},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7462587,\n", " 'index': 106,\n", " 'word': '▁was',\n", " 'start': 389,\n", " 'end': 393},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.77071136,\n", " 'index': 107,\n", " 'word': '▁just',\n", " 'start': 393,\n", " 'end': 398},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7622161,\n", " 'index': 108,\n", " 'word': '▁',\n", " 'start': 398,\n", " 'end': 399},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7517658,\n", " 'index': 109,\n", " 'word': 'another',\n", " 'start': 399,\n", " 'end': 406},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7740035,\n", " 'index': 110,\n", " 'word': '▁Marti',\n", " 'start': 406,\n", " 'end': 412},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7184144,\n", " 'index': 111,\n", " 'word': 'an',\n", " 'start': 412,\n", " 'end': 414},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.697455,\n", " 'index': 112,\n", " 'word': '▁mesa',\n", " 'start': 414,\n", " 'end': 419},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7242985,\n", " 'index': 113,\n", " 'word': ',',\n", " 'start': 419,\n", " 'end': 420},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.729316,\n", " 'index': 114,\n", " 'word': '▁common',\n", " 'start': 420,\n", " 'end': 427},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7581685,\n", " 'index': 115,\n", " 'word': '▁around',\n", " 'start': 427,\n", " 'end': 434},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.75228137,\n", " 'index': 116,\n", " 'word': '▁Cy',\n", " 'start': 434,\n", " 'end': 437},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73617876,\n", " 'index': 117,\n", " 'word': 'doni',\n", " 'start': 437,\n", " 'end': 441},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.694593,\n", " 'index': 118,\n", " 'word': 'a',\n", " 'start': 441,\n", " 'end': 442},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71767867,\n", " 'index': 119,\n", " 'word': ',',\n", " 'start': 442,\n", " 'end': 443},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7295897,\n", " 'index': 120,\n", " 'word': '▁only',\n", " 'start': 443,\n", " 'end': 448},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73233503,\n", " 'index': 121,\n", " 'word': '▁this',\n", " 'start': 448,\n", " 'end': 453},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7232187,\n", " 'index': 122,\n", " 'word': '▁one',\n", " 'start': 453,\n", " 'end': 457},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74401796,\n", " 'index': 123,\n", " 'word': '▁had',\n", " 'start': 457,\n", " 'end': 461},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7337583,\n", " 'index': 124,\n", " 'word': '▁',\n", " 'start': 461,\n", " 'end': 462},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7196742,\n", " 'index': 125,\n", " 'word': 'shadow',\n", " 'start': 462,\n", " 'end': 468},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7004562,\n", " 'index': 126,\n", " 'word': 's',\n", " 'start': 468,\n", " 'end': 469},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7275282,\n", " 'index': 127,\n", " 'word': '▁that',\n", " 'start': 469,\n", " 'end': 474},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.729493,\n", " 'index': 128,\n", " 'word': '▁made',\n", " 'start': 474,\n", " 'end': 479},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7232314,\n", " 'index': 129,\n", " 'word': '▁it',\n", " 'start': 479,\n", " 'end': 482},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72791636,\n", " 'index': 130,\n", " 'word': '▁look',\n", " 'start': 482,\n", " 'end': 487},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73750484,\n", " 'index': 131,\n", " 'word': '▁like',\n", " 'start': 487,\n", " 'end': 492},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7305402,\n", " 'index': 132,\n", " 'word': '▁an',\n", " 'start': 492,\n", " 'end': 495},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7322336,\n", " 'index': 133,\n", " 'word': '▁Egypt',\n", " 'start': 495,\n", " 'end': 501},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7042777,\n", " 'index': 134,\n", " 'word': 'ion',\n", " 'start': 501,\n", " 'end': 504},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.70994717,\n", " 'index': 135,\n", " 'word': '▁Phar',\n", " 'start': 504,\n", " 'end': 509},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.702662,\n", " 'index': 136,\n", " 'word': 'a',\n", " 'start': 509,\n", " 'end': 510},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.70684516,\n", " 'index': 137,\n", " 'word': 'o',\n", " 'start': 510,\n", " 'end': 511},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7180575,\n", " 'index': 138,\n", " 'word': 'h',\n", " 'start': 511,\n", " 'end': 512},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7585362,\n", " 'index': 139,\n", " 'word': '.',\n", " 'start': 512,\n", " 'end': 513},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.78797036,\n", " 'index': 140,\n", " 'word': '▁',\n", " 'start': 513,\n", " 'end': 514},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.77854466,\n", " 'index': 141,\n", " 'word': 'Very',\n", " 'start': 514,\n", " 'end': 518},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76023924,\n", " 'index': 142,\n", " 'word': '▁',\n", " 'start': 518,\n", " 'end': 519},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76562846,\n", " 'index': 143,\n", " 'word': 'few',\n", " 'start': 519,\n", " 'end': 522},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7606485,\n", " 'index': 144,\n", " 'word': '▁days',\n", " 'start': 522,\n", " 'end': 527},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7725775,\n", " 'index': 145,\n", " 'word': '▁later',\n", " 'start': 527,\n", " 'end': 533},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7549871,\n", " 'index': 146,\n", " 'word': ',',\n", " 'start': 533,\n", " 'end': 534},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7813583,\n", " 'index': 147,\n", " 'word': '▁we',\n", " 'start': 534,\n", " 'end': 537},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73188114,\n", " 'index': 148,\n", " 'word': '▁',\n", " 'start': 537,\n", " 'end': 538},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72285676,\n", " 'index': 149,\n", " 'word': 'reveal',\n", " 'start': 538,\n", " 'end': 544},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73699045,\n", " 'index': 150,\n", " 'word': 'ed',\n", " 'start': 544,\n", " 'end': 546},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.732305,\n", " 'index': 151,\n", " 'word': '▁the',\n", " 'start': 546,\n", " 'end': 550},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71078587,\n", " 'index': 152,\n", " 'word': '▁image',\n", " 'start': 550,\n", " 'end': 556},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7434471,\n", " 'index': 153,\n", " 'word': '▁for',\n", " 'start': 556,\n", " 'end': 560},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7312815,\n", " 'index': 154,\n", " 'word': '▁all',\n", " 'start': 560,\n", " 'end': 564},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74657434,\n", " 'index': 155,\n", " 'word': '▁to',\n", " 'start': 564,\n", " 'end': 567},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7268362,\n", " 'index': 156,\n", " 'word': '▁see',\n", " 'start': 567,\n", " 'end': 571},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7400499,\n", " 'index': 157,\n", " 'word': ',',\n", " 'start': 571,\n", " 'end': 572},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7625019,\n", " 'index': 158,\n", " 'word': '▁and',\n", " 'start': 572,\n", " 'end': 576},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7985083,\n", " 'index': 159,\n", " 'word': '▁we',\n", " 'start': 576,\n", " 'end': 579},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.77577597,\n", " 'index': 160,\n", " 'word': '▁made',\n", " 'start': 579,\n", " 'end': 584},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.78290325,\n", " 'index': 161,\n", " 'word': '▁sure',\n", " 'start': 584,\n", " 'end': 589},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76356405,\n", " 'index': 162,\n", " 'word': '▁to',\n", " 'start': 589,\n", " 'end': 592},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73777944,\n", " 'index': 163,\n", " 'word': '▁note',\n", " 'start': 592,\n", " 'end': 597},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76957136,\n", " 'index': 164,\n", " 'word': '▁that',\n", " 'start': 597,\n", " 'end': 602},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76147383,\n", " 'index': 165,\n", " 'word': '▁it',\n", " 'start': 602,\n", " 'end': 605},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.78018135,\n", " 'index': 166,\n", " 'word': '▁was',\n", " 'start': 605,\n", " 'end': 609},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76262224,\n", " 'index': 167,\n", " 'word': '▁',\n", " 'start': 609,\n", " 'end': 610},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7494763,\n", " 'index': 168,\n", " 'word': 'a',\n", " 'start': 610,\n", " 'end': 611},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76614785,\n", " 'index': 169,\n", " 'word': '▁',\n", " 'start': 611,\n", " 'end': 612},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.75811285,\n", " 'index': 170,\n", " 'word': 'huge',\n", " 'start': 612,\n", " 'end': 616},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74348,\n", " 'index': 171,\n", " 'word': '▁rock',\n", " 'start': 616,\n", " 'end': 621},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73305815,\n", " 'index': 172,\n", " 'word': '▁formation',\n", " 'start': 621,\n", " 'end': 631},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74695593,\n", " 'index': 173,\n", " 'word': '▁that',\n", " 'start': 631,\n", " 'end': 636},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7620345,\n", " 'index': 174,\n", " 'word': '▁just',\n", " 'start': 636,\n", " 'end': 641},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7426398,\n", " 'index': 175,\n", " 'word': '▁rese',\n", " 'start': 641,\n", " 'end': 646},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7361759,\n", " 'index': 176,\n", " 'word': 'mbled',\n", " 'start': 646,\n", " 'end': 651},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72064775,\n", " 'index': 177,\n", " 'word': '▁',\n", " 'start': 651,\n", " 'end': 652},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7398345,\n", " 'index': 178,\n", " 'word': 'a',\n", " 'start': 652,\n", " 'end': 653},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7524488,\n", " 'index': 179,\n", " 'word': '▁human',\n", " 'start': 653,\n", " 'end': 659},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7174149,\n", " 'index': 180,\n", " 'word': '▁head',\n", " 'start': 659,\n", " 'end': 664},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7290167,\n", " 'index': 181,\n", " 'word': '▁and',\n", " 'start': 664,\n", " 'end': 668},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7181655,\n", " 'index': 182,\n", " 'word': '▁face',\n", " 'start': 668,\n", " 'end': 673},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73066306,\n", " 'index': 183,\n", " 'word': ',',\n", " 'start': 673,\n", " 'end': 674},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7215076,\n", " 'index': 184,\n", " 'word': '▁but',\n", " 'start': 674,\n", " 'end': 678},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7252736,\n", " 'index': 185,\n", " 'word': '▁all',\n", " 'start': 678,\n", " 'end': 682},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72834283,\n", " 'index': 186,\n", " 'word': '▁of',\n", " 'start': 682,\n", " 'end': 685},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.753237,\n", " 'index': 187,\n", " 'word': '▁it',\n", " 'start': 685,\n", " 'end': 688},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76431835,\n", " 'index': 188,\n", " 'word': '▁was',\n", " 'start': 688,\n", " 'end': 692},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7625411,\n", " 'index': 189,\n", " 'word': '▁',\n", " 'start': 692,\n", " 'end': 693},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76240027,\n", " 'index': 190,\n", " 'word': 'formed',\n", " 'start': 693,\n", " 'end': 699},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.75998336,\n", " 'index': 191,\n", " 'word': '▁by',\n", " 'start': 699,\n", " 'end': 702},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7409608,\n", " 'index': 192,\n", " 'word': '▁',\n", " 'start': 702,\n", " 'end': 703},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72436345,\n", " 'index': 193,\n", " 'word': 'shadow',\n", " 'start': 703,\n", " 'end': 709},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7190035,\n", " 'index': 194,\n", " 'word': 's',\n", " 'start': 709,\n", " 'end': 710},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.78103834,\n", " 'index': 195,\n", " 'word': '.',\n", " 'start': 710,\n", " 'end': 711},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7606551,\n", " 'index': 196,\n", " 'word': '▁We',\n", " 'start': 711,\n", " 'end': 714},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7695547,\n", " 'index': 197,\n", " 'word': '▁only',\n", " 'start': 714,\n", " 'end': 719},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.75947803,\n", " 'index': 198,\n", " 'word': '▁',\n", " 'start': 719,\n", " 'end': 720},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.713467,\n", " 'index': 199,\n", " 'word': 'announced',\n", " 'start': 720,\n", " 'end': 729},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6898313,\n", " 'index': 200,\n", " 'word': '▁it',\n", " 'start': 729,\n", " 'end': 732},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72763705,\n", " 'index': 201,\n", " 'word': '▁',\n", " 'start': 732,\n", " 'end': 733},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74799156,\n", " 'index': 202,\n", " 'word': 'because',\n", " 'start': 733,\n", " 'end': 740},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7443309,\n", " 'index': 203,\n", " 'word': '▁we',\n", " 'start': 740,\n", " 'end': 743},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7246991,\n", " 'index': 204,\n", " 'word': '▁thought',\n", " 'start': 743,\n", " 'end': 751},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6987896,\n", " 'index': 205,\n", " 'word': '▁it',\n", " 'start': 751,\n", " 'end': 754},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.70192343,\n", " 'index': 206,\n", " 'word': '▁',\n", " 'start': 754,\n", " 'end': 755},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6921032,\n", " 'index': 207,\n", " 'word': 'would',\n", " 'start': 755,\n", " 'end': 760},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7164251,\n", " 'index': 208,\n", " 'word': '▁be',\n", " 'start': 760,\n", " 'end': 763},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6848709,\n", " 'index': 209,\n", " 'word': '▁',\n", " 'start': 763,\n", " 'end': 764},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7095274,\n", " 'index': 210,\n", " 'word': 'a',\n", " 'start': 764,\n", " 'end': 765},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7103807,\n", " 'index': 211,\n", " 'word': '▁good',\n", " 'start': 765,\n", " 'end': 770},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7356097,\n", " 'index': 212,\n", " 'word': '▁way',\n", " 'start': 770,\n", " 'end': 774},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.75338656,\n", " 'index': 213,\n", " 'word': '▁to',\n", " 'start': 774,\n", " 'end': 777},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74160135,\n", " 'index': 214,\n", " 'word': '▁engage',\n", " 'start': 777,\n", " 'end': 784},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.78099227,\n", " 'index': 215,\n", " 'word': '▁the',\n", " 'start': 784,\n", " 'end': 788},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74460626,\n", " 'index': 216,\n", " 'word': '▁public',\n", " 'start': 788,\n", " 'end': 795},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7796549,\n", " 'index': 217,\n", " 'word': '▁with',\n", " 'start': 795,\n", " 'end': 800},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.735151,\n", " 'index': 218,\n", " 'word': '▁NASA',\n", " 'start': 800,\n", " 'end': 805},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73245096,\n", " 'index': 219,\n", " 'word': \"'\",\n", " 'start': 805,\n", " 'end': 806},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7472554,\n", " 'index': 220,\n", " 'word': 's',\n", " 'start': 806,\n", " 'end': 807},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7569386,\n", " 'index': 221,\n", " 'word': '▁',\n", " 'start': 807,\n", " 'end': 808},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73314124,\n", " 'index': 222,\n", " 'word': 'finding',\n", " 'start': 808,\n", " 'end': 815},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7164539,\n", " 'index': 223,\n", " 'word': 's',\n", " 'start': 815,\n", " 'end': 816},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72332114,\n", " 'index': 224,\n", " 'word': ',',\n", " 'start': 816,\n", " 'end': 817},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.70425224,\n", " 'index': 225,\n", " 'word': '▁and',\n", " 'start': 817,\n", " 'end': 821},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73506117,\n", " 'index': 226,\n", " 'word': '▁at',\n", " 'start': 821,\n", " 'end': 824},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.64045215,\n", " 'index': 227,\n", " 'word': 'rra',\n", " 'start': 824,\n", " 'end': 827},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6795728,\n", " 'index': 228,\n", " 'word': 'c',\n", " 'start': 827,\n", " 'end': 828},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6943617,\n", " 'index': 229,\n", " 'word': 't',\n", " 'start': 828,\n", " 'end': 829},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.712106,\n", " 'index': 230,\n", " 'word': '▁attention',\n", " 'start': 829,\n", " 'end': 839},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72840285,\n", " 'index': 231,\n", " 'word': '▁to',\n", " 'start': 839,\n", " 'end': 842},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6708754,\n", " 'index': 232,\n", " 'word': '▁Mars',\n", " 'start': 842,\n", " 'end': 847},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.66703403,\n", " 'index': 233,\n", " 'word': '-',\n", " 'start': 847,\n", " 'end': 848},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6601703,\n", " 'index': 234,\n", " 'word': '-',\n", " 'start': 848,\n", " 'end': 849},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71845216,\n", " 'index': 235,\n", " 'word': '▁and',\n", " 'start': 849,\n", " 'end': 853},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6982402,\n", " 'index': 236,\n", " 'word': '▁it',\n", " 'start': 853,\n", " 'end': 856},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.78411067,\n", " 'index': 237,\n", " 'word': '▁did',\n", " 'start': 856,\n", " 'end': 860},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7109891,\n", " 'index': 238,\n", " 'word': '.',\n", " 'start': 860,\n", " 'end': 861},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.77790356,\n", " 'index': 239,\n", " 'word': '▁The',\n", " 'start': 862,\n", " 'end': 866},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7041047,\n", " 'index': 240,\n", " 'word': '▁face',\n", " 'start': 866,\n", " 'end': 871},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.70027363,\n", " 'index': 241,\n", " 'word': '▁on',\n", " 'start': 871,\n", " 'end': 874},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.67925966,\n", " 'index': 242,\n", " 'word': '▁Mars',\n", " 'start': 874,\n", " 'end': 879},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7132371,\n", " 'index': 243,\n", " 'word': '▁soon',\n", " 'start': 879,\n", " 'end': 884},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.70119303,\n", " 'index': 244,\n", " 'word': '▁b',\n", " 'start': 884,\n", " 'end': 886},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7086833,\n", " 'index': 245,\n", " 'word': 'ecame',\n", " 'start': 886,\n", " 'end': 891},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71190226,\n", " 'index': 246,\n", " 'word': '▁',\n", " 'start': 891,\n", " 'end': 892},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.721647,\n", " 'index': 247,\n", " 'word': 'a',\n", " 'start': 892,\n", " 'end': 893},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71419626,\n", " 'index': 248,\n", " 'word': '▁pop',\n", " 'start': 893,\n", " 'end': 897},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7087096,\n", " 'index': 249,\n", " 'word': '▁',\n", " 'start': 897,\n", " 'end': 898},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6964584,\n", " 'index': 250,\n", " 'word': 'icon',\n", " 'start': 898,\n", " 'end': 902},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.70615745,\n", " 'index': 251,\n", " 'word': ';',\n", " 'start': 902,\n", " 'end': 903},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7585035,\n", " 'index': 252,\n", " 'word': '▁shot',\n", " 'start': 903,\n", " 'end': 908},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76861167,\n", " 'index': 253,\n", " 'word': '▁in',\n", " 'start': 908,\n", " 'end': 911},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7368769,\n", " 'index': 254,\n", " 'word': '▁movies',\n", " 'start': 911,\n", " 'end': 918},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71782357,\n", " 'index': 255,\n", " 'word': ',',\n", " 'start': 918,\n", " 'end': 919},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74544686,\n", " 'index': 256,\n", " 'word': '▁',\n", " 'start': 919,\n", " 'end': 920},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74319994,\n", " 'index': 257,\n", " 'word': 'appear',\n", " 'start': 920,\n", " 'end': 926},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7328243,\n", " 'index': 258,\n", " 'word': 'ed',\n", " 'start': 926,\n", " 'end': 928},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.75824165,\n", " 'index': 259,\n", " 'word': '▁in',\n", " 'start': 928,\n", " 'end': 931},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73930544,\n", " 'index': 260,\n", " 'word': '▁books',\n", " 'start': 931,\n", " 'end': 937},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7212368,\n", " 'index': 261,\n", " 'word': ',',\n", " 'start': 937,\n", " 'end': 938},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7187186,\n", " 'index': 262,\n", " 'word': '▁magazine',\n", " 'start': 938,\n", " 'end': 947},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7175941,\n", " 'index': 263,\n", " 'word': 's',\n", " 'start': 947,\n", " 'end': 948},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7070163,\n", " 'index': 264,\n", " 'word': ',',\n", " 'start': 948,\n", " 'end': 949},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7512239,\n", " 'index': 265,\n", " 'word': '▁radio',\n", " 'start': 949,\n", " 'end': 955},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7268631,\n", " 'index': 266,\n", " 'word': '▁talk',\n", " 'start': 955,\n", " 'end': 960},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6982043,\n", " 'index': 267,\n", " 'word': '▁',\n", " 'start': 960,\n", " 'end': 961},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7089483,\n", " 'index': 268,\n", " 'word': 'shows',\n", " 'start': 961,\n", " 'end': 966},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6838974,\n", " 'index': 269,\n", " 'word': ',',\n", " 'start': 966,\n", " 'end': 967},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.69852626,\n", " 'index': 270,\n", " 'word': '▁and',\n", " 'start': 967,\n", " 'end': 971},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72184855,\n", " 'index': 271,\n", " 'word': '▁ha',\n", " 'start': 971,\n", " 'end': 974},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.70426184,\n", " 'index': 272,\n", " 'word': 'unted',\n", " 'start': 974,\n", " 'end': 979},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7255174,\n", " 'index': 273,\n", " 'word': '▁gro',\n", " 'start': 979,\n", " 'end': 983},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73427945,\n", " 'index': 274,\n", " 'word': 'cery',\n", " 'start': 983,\n", " 'end': 987},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7239857,\n", " 'index': 275,\n", " 'word': '▁store',\n", " 'start': 987,\n", " 'end': 993},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.70565,\n", " 'index': 276,\n", " 'word': '▁',\n", " 'start': 993,\n", " 'end': 994},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6898292,\n", " 'index': 277,\n", " 'word': 'checkout',\n", " 'start': 994,\n", " 'end': 1002},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.69602674,\n", " 'index': 278,\n", " 'word': '▁',\n", " 'start': 1002,\n", " 'end': 1003},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6891,\n", " 'index': 279,\n", " 'word': 'lines',\n", " 'start': 1003,\n", " 'end': 1008},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73144776,\n", " 'index': 280,\n", " 'word': '▁for',\n", " 'start': 1008,\n", " 'end': 1012},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6338651,\n", " 'index': 281,\n", " 'word': '▁25',\n", " 'start': 1012,\n", " 'end': 1015},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7095752,\n", " 'index': 282,\n", " 'word': '▁years',\n", " 'start': 1015,\n", " 'end': 1021},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73906696,\n", " 'index': 283,\n", " 'word': '.',\n", " 'start': 1021,\n", " 'end': 1022},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.8037943,\n", " 'index': 284,\n", " 'word': '▁Some',\n", " 'start': 1022,\n", " 'end': 1027},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.77938217,\n", " 'index': 285,\n", " 'word': '▁people',\n", " 'start': 1027,\n", " 'end': 1034},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.762757,\n", " 'index': 286,\n", " 'word': '▁thought',\n", " 'start': 1034,\n", " 'end': 1042},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.8229662,\n", " 'index': 287,\n", " 'word': '▁the',\n", " 'start': 1042,\n", " 'end': 1046},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7882584,\n", " 'index': 288,\n", " 'word': '▁natural',\n", " 'start': 1046,\n", " 'end': 1054},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7520508,\n", " 'index': 289,\n", " 'word': '▁land',\n", " 'start': 1054,\n", " 'end': 1059},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73779494,\n", " 'index': 290,\n", " 'word': 'form',\n", " 'start': 1059,\n", " 'end': 1063},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7814589,\n", " 'index': 291,\n", " 'word': '▁was',\n", " 'start': 1063,\n", " 'end': 1067},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74082786,\n", " 'index': 292,\n", " 'word': '▁',\n", " 'start': 1067,\n", " 'end': 1068},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7207082,\n", " 'index': 293,\n", " 'word': 'evidence',\n", " 'start': 1068,\n", " 'end': 1076},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7479461,\n", " 'index': 294,\n", " 'word': '▁of',\n", " 'start': 1076,\n", " 'end': 1079},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.755139,\n", " 'index': 295,\n", " 'word': '▁life',\n", " 'start': 1079,\n", " 'end': 1084},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7355991,\n", " 'index': 296,\n", " 'word': '▁on',\n", " 'start': 1084,\n", " 'end': 1087},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7187748,\n", " 'index': 297,\n", " 'word': '▁Mars',\n", " 'start': 1087,\n", " 'end': 1092},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7418989,\n", " 'index': 298,\n", " 'word': ',',\n", " 'start': 1092,\n", " 'end': 1093},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73773444,\n", " 'index': 299,\n", " 'word': '▁and',\n", " 'start': 1093,\n", " 'end': 1097},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7697259,\n", " 'index': 300,\n", " 'word': '▁that',\n", " 'start': 1097,\n", " 'end': 1102},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.75266504,\n", " 'index': 301,\n", " 'word': '▁',\n", " 'start': 1102,\n", " 'end': 1103},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7332634,\n", " 'index': 302,\n", " 'word': 'us',\n", " 'start': 1103,\n", " 'end': 1105},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7624355,\n", " 'index': 303,\n", " 'word': '▁',\n", " 'start': 1105,\n", " 'end': 1106},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7445762,\n", " 'index': 304,\n", " 'word': 'scientist',\n", " 'start': 1106,\n", " 'end': 1115},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72726446,\n", " 'index': 305,\n", " 'word': 's',\n", " 'start': 1115,\n", " 'end': 1116},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74028957,\n", " 'index': 306,\n", " 'word': '▁',\n", " 'start': 1116,\n", " 'end': 1117},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7528527,\n", " 'index': 307,\n", " 'word': 'wanted',\n", " 'start': 1117,\n", " 'end': 1123},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7751754,\n", " 'index': 308,\n", " 'word': '▁to',\n", " 'start': 1123,\n", " 'end': 1126},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7538359,\n", " 'index': 309,\n", " 'word': '▁',\n", " 'start': 1126,\n", " 'end': 1127},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72743875,\n", " 'index': 310,\n", " 'word': 'hide',\n", " 'start': 1127,\n", " 'end': 1131},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74001145,\n", " 'index': 311,\n", " 'word': '▁it',\n", " 'start': 1131,\n", " 'end': 1134},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7524098,\n", " 'index': 312,\n", " 'word': ',',\n", " 'start': 1134,\n", " 'end': 1135},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.80662686,\n", " 'index': 313,\n", " 'word': '▁but',\n", " 'start': 1135,\n", " 'end': 1139},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.78448457,\n", " 'index': 314,\n", " 'word': '▁',\n", " 'start': 1139,\n", " 'end': 1140},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.70067596,\n", " 'index': 315,\n", " 'word': 'really',\n", " 'start': 1140,\n", " 'end': 1146},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73782057,\n", " 'index': 316,\n", " 'word': ',',\n", " 'start': 1146,\n", " 'end': 1147},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.78326744,\n", " 'index': 317,\n", " 'word': '▁the',\n", " 'start': 1147,\n", " 'end': 1151},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7770973,\n", " 'index': 318,\n", " 'word': '▁',\n", " 'start': 1151,\n", " 'end': 1152},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72133195,\n", " 'index': 319,\n", " 'word': 'defender',\n", " 'start': 1152,\n", " 'end': 1160},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73959935,\n", " 'index': 320,\n", " 'word': 's',\n", " 'start': 1160,\n", " 'end': 1161},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76056314,\n", " 'index': 321,\n", " 'word': '▁of',\n", " 'start': 1161,\n", " 'end': 1164},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7938618,\n", " 'index': 322,\n", " 'word': '▁the',\n", " 'start': 1164,\n", " 'end': 1168},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.752001,\n", " 'index': 323,\n", " 'word': '▁NASA',\n", " 'start': 1168,\n", " 'end': 1173},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72408575,\n", " 'index': 324,\n", " 'word': '▁budget',\n", " 'start': 1173,\n", " 'end': 1180},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.70957243,\n", " 'index': 325,\n", " 'word': '▁wish',\n", " 'start': 1180,\n", " 'end': 1185},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7567643,\n", " 'index': 326,\n", " 'word': '▁there',\n", " 'start': 1185,\n", " 'end': 1191},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.75911105,\n", " 'index': 327,\n", " 'word': '▁was',\n", " 'start': 1191,\n", " 'end': 1195},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76012385,\n", " 'index': 328,\n", " 'word': '▁',\n", " 'start': 1195,\n", " 'end': 1196},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.732424,\n", " 'index': 329,\n", " 'word': 'ancient',\n", " 'start': 1196,\n", " 'end': 1203},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73175794,\n", " 'index': 330,\n", " 'word': '▁c',\n", " 'start': 1203,\n", " 'end': 1205},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7199019,\n", " 'index': 331,\n", " 'word': 'ivilization',\n", " 'start': 1205,\n", " 'end': 1216},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7307094,\n", " 'index': 332,\n", " 'word': '▁on',\n", " 'start': 1216,\n", " 'end': 1219},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7247836,\n", " 'index': 333,\n", " 'word': '▁Mars',\n", " 'start': 1219,\n", " 'end': 1224},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7309902,\n", " 'index': 334,\n", " 'word': '.',\n", " 'start': 1224,\n", " 'end': 1225},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7614061,\n", " 'index': 335,\n", " 'word': '▁We',\n", " 'start': 1225,\n", " 'end': 1228},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72096175,\n", " 'index': 336,\n", " 'word': '▁decide',\n", " 'start': 1228,\n", " 'end': 1235},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.69636214,\n", " 'index': 337,\n", " 'word': 'd',\n", " 'start': 1235,\n", " 'end': 1236},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7201302,\n", " 'index': 338,\n", " 'word': '▁to',\n", " 'start': 1236,\n", " 'end': 1239},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71916217,\n", " 'index': 339,\n", " 'word': '▁take',\n", " 'start': 1239,\n", " 'end': 1244},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.75461584,\n", " 'index': 340,\n", " 'word': '▁',\n", " 'start': 1244,\n", " 'end': 1245},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7655589,\n", " 'index': 341,\n", " 'word': 'another',\n", " 'start': 1245,\n", " 'end': 1252},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7383289,\n", " 'index': 342,\n", " 'word': '▁shot',\n", " 'start': 1252,\n", " 'end': 1257},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76632655,\n", " 'index': 343,\n", " 'word': '▁just',\n", " 'start': 1257,\n", " 'end': 1262},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7247141,\n", " 'index': 344,\n", " 'word': '▁to',\n", " 'start': 1262,\n", " 'end': 1265},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73396194,\n", " 'index': 345,\n", " 'word': '▁make',\n", " 'start': 1265,\n", " 'end': 1270},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.75854856,\n", " 'index': 346,\n", " 'word': '▁sure',\n", " 'start': 1270,\n", " 'end': 1275},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76170975,\n", " 'index': 347,\n", " 'word': '▁we',\n", " 'start': 1275,\n", " 'end': 1278},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.75737375,\n", " 'index': 348,\n", " 'word': '▁were',\n", " 'start': 1278,\n", " 'end': 1283},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7405633,\n", " 'index': 349,\n", " 'word': 'n',\n", " 'start': 1283,\n", " 'end': 1284},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7373094,\n", " 'index': 350,\n", " 'word': \"'\",\n", " 'start': 1284,\n", " 'end': 1285},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74561375,\n", " 'index': 351,\n", " 'word': 't',\n", " 'start': 1285,\n", " 'end': 1286},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7152604,\n", " 'index': 352,\n", " 'word': '▁wrong',\n", " 'start': 1286,\n", " 'end': 1292},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71036845,\n", " 'index': 353,\n", " 'word': ',',\n", " 'start': 1292,\n", " 'end': 1293},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7824172,\n", " 'index': 354,\n", " 'word': '▁on',\n", " 'start': 1293,\n", " 'end': 1296},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76754534,\n", " 'index': 355,\n", " 'word': '▁April',\n", " 'start': 1296,\n", " 'end': 1302},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7371393,\n", " 'index': 356,\n", " 'word': '▁5,',\n", " 'start': 1302,\n", " 'end': 1305},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6837726,\n", " 'index': 357,\n", " 'word': '▁1998',\n", " 'start': 1305,\n", " 'end': 1310},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7280978,\n", " 'index': 358,\n", " 'word': '.',\n", " 'start': 1310,\n", " 'end': 1311},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7754057,\n", " 'index': 359,\n", " 'word': '▁Michael',\n", " 'start': 1311,\n", " 'end': 1319},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6482514,\n", " 'index': 360,\n", " 'word': '▁Malin',\n", " 'start': 1319,\n", " 'end': 1325},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.70006573,\n", " 'index': 361,\n", " 'word': '▁and',\n", " 'start': 1325,\n", " 'end': 1329},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7558913,\n", " 'index': 362,\n", " 'word': '▁his',\n", " 'start': 1329,\n", " 'end': 1333},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7198912,\n", " 'index': 363,\n", " 'word': '▁Mars',\n", " 'start': 1333,\n", " 'end': 1338},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7419942,\n", " 'index': 364,\n", " 'word': '▁',\n", " 'start': 1338,\n", " 'end': 1339},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72979295,\n", " 'index': 365,\n", " 'word': 'Orbit',\n", " 'start': 1339,\n", " 'end': 1344},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7115055,\n", " 'index': 366,\n", " 'word': 'er',\n", " 'start': 1344,\n", " 'end': 1346},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7362249,\n", " 'index': 367,\n", " 'word': '▁camera',\n", " 'start': 1346,\n", " 'end': 1353},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6770409,\n", " 'index': 368,\n", " 'word': '▁team',\n", " 'start': 1353,\n", " 'end': 1358},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72247416,\n", " 'index': 369,\n", " 'word': '▁',\n", " 'start': 1358,\n", " 'end': 1359},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7055674,\n", " 'index': 370,\n", " 'word': 'took',\n", " 'start': 1359,\n", " 'end': 1363},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73880804,\n", " 'index': 371,\n", " 'word': '▁',\n", " 'start': 1363,\n", " 'end': 1364},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71011674,\n", " 'index': 372,\n", " 'word': 'a',\n", " 'start': 1364,\n", " 'end': 1365},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6853782,\n", " 'index': 373,\n", " 'word': '▁picture',\n", " 'start': 1365,\n", " 'end': 1373},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7360565,\n", " 'index': 374,\n", " 'word': '▁that',\n", " 'start': 1373,\n", " 'end': 1378},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7494284,\n", " 'index': 375,\n", " 'word': '▁was',\n", " 'start': 1378,\n", " 'end': 1382},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7195536,\n", " 'index': 376,\n", " 'word': '▁ten',\n", " 'start': 1382,\n", " 'end': 1386},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.75279045,\n", " 'index': 377,\n", " 'word': '▁times',\n", " 'start': 1386,\n", " 'end': 1392},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7426931,\n", " 'index': 378,\n", " 'word': '▁',\n", " 'start': 1392,\n", " 'end': 1393},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72122455,\n", " 'index': 379,\n", " 'word': 'sharp',\n", " 'start': 1393,\n", " 'end': 1398},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7237729,\n", " 'index': 380,\n", " 'word': 'er',\n", " 'start': 1398,\n", " 'end': 1400},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76766646,\n", " 'index': 381,\n", " 'word': '▁than',\n", " 'start': 1400,\n", " 'end': 1405},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.77287704,\n", " 'index': 382,\n", " 'word': '▁the',\n", " 'start': 1405,\n", " 'end': 1409},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.77964455,\n", " 'index': 383,\n", " 'word': '▁original',\n", " 'start': 1409,\n", " 'end': 1418},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7435452,\n", " 'index': 384,\n", " 'word': '▁Viking',\n", " 'start': 1418,\n", " 'end': 1425},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7510399,\n", " 'index': 385,\n", " 'word': '▁photos',\n", " 'start': 1425,\n", " 'end': 1432},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71882915,\n", " 'index': 386,\n", " 'word': ',',\n", " 'start': 1432,\n", " 'end': 1433},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7779149,\n", " 'index': 387,\n", " 'word': '▁',\n", " 'start': 1433,\n", " 'end': 1434},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7555608,\n", " 'index': 388,\n", " 'word': 'reveal',\n", " 'start': 1434,\n", " 'end': 1440},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.75253564,\n", " 'index': 389,\n", " 'word': 'ing',\n", " 'start': 1440,\n", " 'end': 1443},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76151603,\n", " 'index': 390,\n", " 'word': '▁',\n", " 'start': 1443,\n", " 'end': 1444},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74064636,\n", " 'index': 391,\n", " 'word': 'a',\n", " 'start': 1444,\n", " 'end': 1445},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.75289667,\n", " 'index': 392,\n", " 'word': '▁natural',\n", " 'start': 1445,\n", " 'end': 1453},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7026849,\n", " 'index': 393,\n", " 'word': '▁land',\n", " 'start': 1453,\n", " 'end': 1458},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71034503,\n", " 'index': 394,\n", " 'word': 'form',\n", " 'start': 1458,\n", " 'end': 1462},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7105492,\n", " 'index': 395,\n", " 'word': ',',\n", " 'start': 1462,\n", " 'end': 1463},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73492867,\n", " 'index': 396,\n", " 'word': '▁',\n", " 'start': 1463,\n", " 'end': 1464},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73296964,\n", " 'index': 397,\n", " 'word': 'which',\n", " 'start': 1464,\n", " 'end': 1469},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.749125,\n", " 'index': 398,\n", " 'word': '▁me',\n", " 'start': 1469,\n", " 'end': 1472},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7666059,\n", " 'index': 399,\n", " 'word': 'ant',\n", " 'start': 1472,\n", " 'end': 1475},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.78626037,\n", " 'index': 400,\n", " 'word': '▁no',\n", " 'start': 1475,\n", " 'end': 1478},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7852426,\n", " 'index': 401,\n", " 'word': '▁alien',\n", " 'start': 1478,\n", " 'end': 1484},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7328261,\n", " 'index': 402,\n", " 'word': '▁monument',\n", " 'start': 1484,\n", " 'end': 1493},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.75887156,\n", " 'index': 403,\n", " 'word': '.',\n", " 'start': 1493,\n", " 'end': 1494},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7391275,\n", " 'index': 404,\n", " 'word': '▁\"',\n", " 'start': 1494,\n", " 'end': 1496},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7504874,\n", " 'index': 405,\n", " 'word': 'But',\n", " 'start': 1496,\n", " 'end': 1499},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7821028,\n", " 'index': 406,\n", " 'word': '▁that',\n", " 'start': 1499,\n", " 'end': 1504},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72317517,\n", " 'index': 407,\n", " 'word': '▁picture',\n", " 'start': 1504,\n", " 'end': 1512},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.760265,\n", " 'index': 408,\n", " 'word': '▁was',\n", " 'start': 1512,\n", " 'end': 1516},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7557999,\n", " 'index': 409,\n", " 'word': 'n',\n", " 'start': 1516,\n", " 'end': 1517},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7516783,\n", " 'index': 410,\n", " 'word': \"'\",\n", " 'start': 1517,\n", " 'end': 1518},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.767745,\n", " 'index': 411,\n", " 'word': 't',\n", " 'start': 1518,\n", " 'end': 1519},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.70629925,\n", " 'index': 412,\n", " 'word': '▁',\n", " 'start': 1519,\n", " 'end': 1520},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7736552,\n", " 'index': 413,\n", " 'word': 'very',\n", " 'start': 1520,\n", " 'end': 1524},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7615671,\n", " 'index': 414,\n", " 'word': '▁clear',\n", " 'start': 1524,\n", " 'end': 1530},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7786598,\n", " 'index': 415,\n", " 'word': '▁at',\n", " 'start': 1530,\n", " 'end': 1533},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76966304,\n", " 'index': 416,\n", " 'word': '▁all',\n", " 'start': 1533,\n", " 'end': 1537},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7294976,\n", " 'index': 417,\n", " 'word': ',',\n", " 'start': 1537,\n", " 'end': 1538},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7374973,\n", " 'index': 418,\n", " 'word': '▁',\n", " 'start': 1538,\n", " 'end': 1539},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7297504,\n", " 'index': 419,\n", " 'word': 'which',\n", " 'start': 1539,\n", " 'end': 1544},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7293893,\n", " 'index': 420,\n", " 'word': '▁',\n", " 'start': 1544,\n", " 'end': 1545},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7144947,\n", " 'index': 421,\n", " 'word': 'could',\n", " 'start': 1545,\n", " 'end': 1550},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7338257,\n", " 'index': 422,\n", " 'word': '▁mean',\n", " 'start': 1550,\n", " 'end': 1555},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76099664,\n", " 'index': 423,\n", " 'word': '▁alien',\n", " 'start': 1555,\n", " 'end': 1561},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7559823,\n", " 'index': 424,\n", " 'word': '▁',\n", " 'start': 1561,\n", " 'end': 1562},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7338846,\n", " 'index': 425,\n", " 'word': 'marking',\n", " 'start': 1562,\n", " 'end': 1569},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7261454,\n", " 'index': 426,\n", " 'word': 's',\n", " 'start': 1569,\n", " 'end': 1570},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74386823,\n", " 'index': 427,\n", " 'word': '▁were',\n", " 'start': 1570,\n", " 'end': 1575},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7308139,\n", " 'index': 428,\n", " 'word': '▁',\n", " 'start': 1575,\n", " 'end': 1576},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7243498,\n", " 'index': 429,\n", " 'word': 'hidden',\n", " 'start': 1576,\n", " 'end': 1582},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7429684,\n", " 'index': 430,\n", " 'word': '▁by',\n", " 'start': 1582,\n", " 'end': 1585},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72541994,\n", " 'index': 431,\n", " 'word': '▁ha',\n", " 'start': 1585,\n", " 'end': 1588},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6984805,\n", " 'index': 432,\n", " 'word': 'ze',\n", " 'start': 1588,\n", " 'end': 1590},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73156893,\n", " 'index': 433,\n", " 'word': '\"',\n", " 'start': 1590,\n", " 'end': 1591},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.82595235,\n", " 'index': 434,\n", " 'word': '▁Well',\n", " 'start': 1591,\n", " 'end': 1596},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.78789514,\n", " 'index': 435,\n", " 'word': '▁no',\n", " 'start': 1596,\n", " 'end': 1599},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7215853,\n", " 'index': 436,\n", " 'word': ',',\n", " 'start': 1599,\n", " 'end': 1600},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7419579,\n", " 'index': 437,\n", " 'word': '▁yes',\n", " 'start': 1600,\n", " 'end': 1604},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7984193,\n", " 'index': 438,\n", " 'word': '▁that',\n", " 'start': 1604,\n", " 'end': 1609},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7603877,\n", " 'index': 439,\n", " 'word': '▁',\n", " 'start': 1609,\n", " 'end': 1610},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7185078,\n", " 'index': 440,\n", " 'word': 'rumor',\n", " 'start': 1610,\n", " 'end': 1615},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72666425,\n", " 'index': 441,\n", " 'word': '▁',\n", " 'start': 1615,\n", " 'end': 1616},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7199019,\n", " 'index': 442,\n", " 'word': 'started',\n", " 'start': 1616,\n", " 'end': 1623},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6944407,\n", " 'index': 443,\n", " 'word': ',',\n", " 'start': 1623,\n", " 'end': 1624},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7404226,\n", " 'index': 444,\n", " 'word': '▁but',\n", " 'start': 1624,\n", " 'end': 1628},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.782483,\n", " 'index': 445,\n", " 'word': '▁to',\n", " 'start': 1628,\n", " 'end': 1631},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.728395,\n", " 'index': 446,\n", " 'word': '▁prove',\n", " 'start': 1631,\n", " 'end': 1637},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7058425,\n", " 'index': 447,\n", " 'word': '▁them',\n", " 'start': 1637,\n", " 'end': 1642},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71794623,\n", " 'index': 448,\n", " 'word': '▁wrong',\n", " 'start': 1642,\n", " 'end': 1648},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7216178,\n", " 'index': 449,\n", " 'word': '▁on',\n", " 'start': 1648,\n", " 'end': 1651},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7217002,\n", " 'index': 450,\n", " 'word': '▁April',\n", " 'start': 1651,\n", " 'end': 1657},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.69683576,\n", " 'index': 451,\n", " 'word': '▁8,',\n", " 'start': 1657,\n", " 'end': 1660},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6255177,\n", " 'index': 452,\n", " 'word': '▁2001',\n", " 'start': 1660,\n", " 'end': 1665},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74123895,\n", " 'index': 453,\n", " 'word': '▁we',\n", " 'start': 1665,\n", " 'end': 1668},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7217462,\n", " 'index': 454,\n", " 'word': '▁decide',\n", " 'start': 1668,\n", " 'end': 1675},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.68256843,\n", " 'index': 455,\n", " 'word': 'd',\n", " 'start': 1675,\n", " 'end': 1676},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.69462407,\n", " 'index': 456,\n", " 'word': '▁to',\n", " 'start': 1676,\n", " 'end': 1679},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7120118,\n", " 'index': 457,\n", " 'word': '▁take',\n", " 'start': 1679,\n", " 'end': 1684},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.727404,\n", " 'index': 458,\n", " 'word': '▁',\n", " 'start': 1684,\n", " 'end': 1685},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7168176,\n", " 'index': 459,\n", " 'word': 'another',\n", " 'start': 1685,\n", " 'end': 1692},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6914268,\n", " 'index': 460,\n", " 'word': '▁picture',\n", " 'start': 1692,\n", " 'end': 1700},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.65116674,\n", " 'index': 461,\n", " 'word': ',',\n", " 'start': 1700,\n", " 'end': 1701},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7633309,\n", " 'index': 462,\n", " 'word': '▁',\n", " 'start': 1701,\n", " 'end': 1702},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7356425,\n", " 'index': 463,\n", " 'word': 'making',\n", " 'start': 1702,\n", " 'end': 1708},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7292783,\n", " 'index': 464,\n", " 'word': '▁sure',\n", " 'start': 1708,\n", " 'end': 1713},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7169685,\n", " 'index': 465,\n", " 'word': '▁it',\n", " 'start': 1713,\n", " 'end': 1716},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7440418,\n", " 'index': 466,\n", " 'word': '▁was',\n", " 'start': 1716,\n", " 'end': 1720},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.69143945,\n", " 'index': 467,\n", " 'word': '▁',\n", " 'start': 1720,\n", " 'end': 1721},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7173495,\n", " 'index': 468,\n", " 'word': 'a',\n", " 'start': 1721,\n", " 'end': 1722},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.68509066,\n", " 'index': 469,\n", " 'word': '▁cloud',\n", " 'start': 1722,\n", " 'end': 1728},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.66747963,\n", " 'index': 470,\n", " 'word': 'less',\n", " 'start': 1728,\n", " 'end': 1732},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.70866424,\n", " 'index': 471,\n", " 'word': '▁summer',\n", " 'start': 1732,\n", " 'end': 1739},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6921813,\n", " 'index': 472,\n", " 'word': '▁day',\n", " 'start': 1739,\n", " 'end': 1743},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71640414,\n", " 'index': 473,\n", " 'word': '.',\n", " 'start': 1743,\n", " 'end': 1744},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.739546,\n", " 'index': 474,\n", " 'word': '▁Malin',\n", " 'start': 1744,\n", " 'end': 1750},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7273382,\n", " 'index': 475,\n", " 'word': \"'\",\n", " 'start': 1750,\n", " 'end': 1751},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7459867,\n", " 'index': 476,\n", " 'word': 's',\n", " 'start': 1751,\n", " 'end': 1752},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73050207,\n", " 'index': 477,\n", " 'word': '▁team',\n", " 'start': 1752,\n", " 'end': 1757},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.735162,\n", " 'index': 478,\n", " 'word': '▁captur',\n", " 'start': 1757,\n", " 'end': 1764},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7368789,\n", " 'index': 479,\n", " 'word': 'ed',\n", " 'start': 1764,\n", " 'end': 1766},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7655242,\n", " 'index': 480,\n", " 'word': '▁an',\n", " 'start': 1766,\n", " 'end': 1769},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7452704,\n", " 'index': 481,\n", " 'word': '▁amazing',\n", " 'start': 1769,\n", " 'end': 1777},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71074563,\n", " 'index': 482,\n", " 'word': '▁photo',\n", " 'start': 1777,\n", " 'end': 1783},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7648886,\n", " 'index': 483,\n", " 'word': '▁',\n", " 'start': 1783,\n", " 'end': 1784},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7509412,\n", " 'index': 484,\n", " 'word': 'using',\n", " 'start': 1784,\n", " 'end': 1789},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74002814,\n", " 'index': 485,\n", " 'word': '▁the',\n", " 'start': 1789,\n", " 'end': 1793},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7145964,\n", " 'index': 486,\n", " 'word': '▁camera',\n", " 'start': 1793,\n", " 'end': 1800},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.69679904,\n", " 'index': 487,\n", " 'word': \"'\",\n", " 'start': 1800,\n", " 'end': 1801},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.70451593,\n", " 'index': 488,\n", " 'word': 's',\n", " 'start': 1801,\n", " 'end': 1802},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71261543,\n", " 'index': 489,\n", " 'word': '▁',\n", " 'start': 1802,\n", " 'end': 1803},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71001965,\n", " 'index': 490,\n", " 'word': 'absolute',\n", " 'start': 1803,\n", " 'end': 1811},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71867234,\n", " 'index': 491,\n", " 'word': '▁',\n", " 'start': 1811,\n", " 'end': 1812},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7250019,\n", " 'index': 492,\n", " 'word': 'maximum',\n", " 'start': 1812,\n", " 'end': 1819},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71001124,\n", " 'index': 493,\n", " 'word': '▁revolution',\n", " 'start': 1819,\n", " 'end': 1830},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6948604,\n", " 'index': 494,\n", " 'word': '.',\n", " 'start': 1830,\n", " 'end': 1831},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.8154096,\n", " 'index': 495,\n", " 'word': '▁With',\n", " 'start': 1831,\n", " 'end': 1836},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76373816,\n", " 'index': 496,\n", " 'word': '▁this',\n", " 'start': 1836,\n", " 'end': 1841},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7451351,\n", " 'index': 497,\n", " 'word': '▁camera',\n", " 'start': 1841,\n", " 'end': 1848},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.82403296,\n", " 'index': 498,\n", " 'word': '▁you',\n", " 'start': 1848,\n", " 'end': 1852},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.77782404,\n", " 'index': 499,\n", " 'word': '▁can',\n", " 'start': 1852,\n", " 'end': 1856},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.77488196,\n", " 'index': 500,\n", " 'word': '▁disc',\n", " 'start': 1856,\n", " 'end': 1861},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7763104,\n", " 'index': 501,\n", " 'word': 'er',\n", " 'start': 1861,\n", " 'end': 1863},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.78899276,\n", " 'index': 502,\n", " 'word': 'n',\n", " 'start': 1863,\n", " 'end': 1864},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7839318,\n", " 'index': 503,\n", " 'word': '▁things',\n", " 'start': 1864,\n", " 'end': 1871},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7864886,\n", " 'index': 504,\n", " 'word': '▁in',\n", " 'start': 1871,\n", " 'end': 1874},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7598064,\n", " 'index': 505,\n", " 'word': '▁',\n", " 'start': 1874,\n", " 'end': 1875},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76907665,\n", " 'index': 506,\n", " 'word': 'a',\n", " 'start': 1875,\n", " 'end': 1876},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.771052,\n", " 'index': 507,\n", " 'word': '▁digital',\n", " 'start': 1876,\n", " 'end': 1884},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.75097096,\n", " 'index': 508,\n", " 'word': '▁image',\n", " 'start': 1884,\n", " 'end': 1890},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72820973,\n", " 'index': 509,\n", " 'word': ',',\n", " 'start': 1890,\n", " 'end': 1891},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.703228,\n", " 'index': 510,\n", " 'word': '▁3',\n", " 'start': 1891,\n", " 'end': 1893},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74737346,\n", " 'index': 511,\n", " 'word': '▁times',\n", " 'start': 1893,\n", " 'end': 1899},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74640083,\n", " 'index': 512,\n", " 'word': '▁b',\n", " 'start': 1899,\n", " 'end': 1901},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73074967,\n", " 'index': 513,\n", " 'word': 'igger',\n", " 'start': 1901,\n", " 'end': 1906},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74877274,\n", " 'index': 514,\n", " 'word': '▁than',\n", " 'start': 1906,\n", " 'end': 1911},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7517363,\n", " 'index': 515,\n", " 'word': '▁the',\n", " 'start': 1911,\n", " 'end': 1915},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71109414,\n", " 'index': 516,\n", " 'word': '▁pixel',\n", " 'start': 1915,\n", " 'end': 1921},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7338081,\n", " 'index': 517,\n", " 'word': '▁size',\n", " 'start': 1921,\n", " 'end': 1926},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.8042635,\n", " 'index': 518,\n", " 'word': '▁',\n", " 'start': 1926,\n", " 'end': 1927},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74277866,\n", " 'index': 519,\n", " 'word': 'which',\n", " 'start': 1927,\n", " 'end': 1932},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.765812,\n", " 'index': 520,\n", " 'word': '▁',\n", " 'start': 1932,\n", " 'end': 1933},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73706234,\n", " 'index': 521,\n", " 'word': 'means',\n", " 'start': 1933,\n", " 'end': 1938},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7790774,\n", " 'index': 522,\n", " 'word': '▁if',\n", " 'start': 1938,\n", " 'end': 1941},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7646758,\n", " 'index': 523,\n", " 'word': '▁there',\n", " 'start': 1941,\n", " 'end': 1947},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7686542,\n", " 'index': 524,\n", " 'word': '▁were',\n", " 'start': 1947,\n", " 'end': 1952},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.77635634,\n", " 'index': 525,\n", " 'word': '▁any',\n", " 'start': 1952,\n", " 'end': 1956},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.77535146,\n", " 'index': 526,\n", " 'word': '▁',\n", " 'start': 1956,\n", " 'end': 1957},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7573227,\n", " 'index': 527,\n", " 'word': 'signs',\n", " 'start': 1957,\n", " 'end': 1962},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74084586,\n", " 'index': 528,\n", " 'word': '▁of',\n", " 'start': 1962,\n", " 'end': 1965},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7274466,\n", " 'index': 529,\n", " 'word': '▁life',\n", " 'start': 1965,\n", " 'end': 1970},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7055662,\n", " 'index': 530,\n", " 'word': ',',\n", " 'start': 1970,\n", " 'end': 1971},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74733704,\n", " 'index': 531,\n", " 'word': '▁you',\n", " 'start': 1971,\n", " 'end': 1975},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74577606,\n", " 'index': 532,\n", " 'word': '▁',\n", " 'start': 1975,\n", " 'end': 1976},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74793524,\n", " 'index': 533,\n", " 'word': 'could',\n", " 'start': 1976,\n", " 'end': 1981},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7183822,\n", " 'index': 534,\n", " 'word': '▁',\n", " 'start': 1981,\n", " 'end': 1982},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7445473,\n", " 'index': 535,\n", " 'word': 'e',\n", " 'start': 1982,\n", " 'end': 1983},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7600949,\n", " 'index': 536,\n", " 'word': 'asily',\n", " 'start': 1983,\n", " 'end': 1988},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7463867,\n", " 'index': 537,\n", " 'word': '▁see',\n", " 'start': 1988,\n", " 'end': 1992},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7444904,\n", " 'index': 538,\n", " 'word': '▁what',\n", " 'start': 1992,\n", " 'end': 1997},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7586323,\n", " 'index': 539,\n", " 'word': '▁the',\n", " 'start': 1997,\n", " 'end': 2001},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.75233936,\n", " 'index': 540,\n", " 'word': 'y',\n", " 'start': 2001,\n", " 'end': 2002},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7635048,\n", " 'index': 541,\n", " 'word': '▁were',\n", " 'start': 2002,\n", " 'end': 2007},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71720314,\n", " 'index': 542,\n", " 'word': '.',\n", " 'start': 2007,\n", " 'end': 2008},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7992655,\n", " 'index': 543,\n", " 'word': '▁What',\n", " 'start': 2008,\n", " 'end': 2013},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72489667,\n", " 'index': 544,\n", " 'word': '▁the',\n", " 'start': 2013,\n", " 'end': 2017},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.65808624,\n", " 'index': 545,\n", " 'word': '▁picture',\n", " 'start': 2017,\n", " 'end': 2025},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.69231373,\n", " 'index': 546,\n", " 'word': '▁show',\n", " 'start': 2025,\n", " 'end': 2030},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6862266,\n", " 'index': 547,\n", " 'word': 'ed',\n", " 'start': 2030,\n", " 'end': 2032},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7581496,\n", " 'index': 548,\n", " 'word': '▁was',\n", " 'start': 2032,\n", " 'end': 2036},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7601303,\n", " 'index': 549,\n", " 'word': '▁the',\n", " 'start': 2036,\n", " 'end': 2040},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72894543,\n", " 'index': 550,\n", " 'word': '▁but',\n", " 'start': 2040,\n", " 'end': 2044},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6921015,\n", " 'index': 551,\n", " 'word': 'te',\n", " 'start': 2044,\n", " 'end': 2046},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72469836,\n", " 'index': 552,\n", " 'word': '▁or',\n", " 'start': 2046,\n", " 'end': 2049},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.68593043,\n", " 'index': 553,\n", " 'word': '▁mesa',\n", " 'start': 2049,\n", " 'end': 2054},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.69867843,\n", " 'index': 554,\n", " 'word': ',',\n", " 'start': 2054,\n", " 'end': 2055},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71468014,\n", " 'index': 555,\n", " 'word': '▁',\n", " 'start': 2055,\n", " 'end': 2056},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.75708663,\n", " 'index': 556,\n", " 'word': 'which',\n", " 'start': 2056,\n", " 'end': 2061},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74836534,\n", " 'index': 557,\n", " 'word': '▁are',\n", " 'start': 2061,\n", " 'end': 2065},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73992217,\n", " 'index': 558,\n", " 'word': '▁land',\n", " 'start': 2065,\n", " 'end': 2070},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72869176,\n", " 'index': 559,\n", " 'word': 'form',\n", " 'start': 2070,\n", " 'end': 2074},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7219306,\n", " 'index': 560,\n", " 'word': 's',\n", " 'start': 2074,\n", " 'end': 2075},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7646589,\n", " 'index': 561,\n", " 'word': '▁common',\n", " 'start': 2075,\n", " 'end': 2082},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.78755254,\n", " 'index': 562,\n", " 'word': '▁around',\n", " 'start': 2082,\n", " 'end': 2089},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.81112164,\n", " 'index': 563,\n", " 'word': '▁the',\n", " 'start': 2089,\n", " 'end': 2093},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.764886,\n", " 'index': 564,\n", " 'word': '▁American',\n", " 'start': 2093,\n", " 'end': 2102},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7145308,\n", " 'index': 565,\n", " 'word': '▁West',\n", " 'start': 2102,\n", " 'end': 2107},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6772878,\n", " 'index': 566,\n", " 'word': '.',\n", " 'start': 2107,\n", " 'end': 2108}]" ] }, "execution_count": 99, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"HiTZ/mdeberta-expl-extraction-multi\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"HiTZ/mdeberta-expl-extraction-multi\")\n", "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "\n", "ner_results = nlp(text)\n", "ner_results" ] }, { "cell_type": "markdown", "id": "6d68cccb-bc9e-415c-babb-3dd2b2ff72cb", "metadata": {}, "source": [ "## 47 rollerhafeezh-amikom/xlm-roberta-base-ner-augmentation" ] }, { "cell_type": "code", "execution_count": 101, "id": "34fc2413-4e56-430f-bb99-16869761b1c7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'entity': 'B-LOC',\n", " 'score': 0.9935009,\n", " 'index': 103,\n", " 'word': '▁Cy',\n", " 'start': 435,\n", " 'end': 437},\n", " {'entity': 'I-LOC',\n", " 'score': 0.9510317,\n", " 'index': 104,\n", " 'word': 'do',\n", " 'start': 437,\n", " 'end': 439},\n", " {'entity': 'I-LOC',\n", " 'score': 0.96693367,\n", " 'index': 105,\n", " 'word': 'nia',\n", " 'start': 439,\n", " 'end': 442},\n", " {'entity': 'B-LOC',\n", " 'score': 0.7514709,\n", " 'index': 264,\n", " 'word': '▁Mars',\n", " 'start': 1088,\n", " 'end': 1092},\n", " {'entity': 'B-DAT',\n", " 'score': 0.9943066,\n", " 'index': 315,\n", " 'word': '▁April',\n", " 'start': 1297,\n", " 'end': 1302},\n", " {'entity': 'I-DAT',\n", " 'score': 0.99545693,\n", " 'index': 316,\n", " 'word': '▁5',\n", " 'start': 1303,\n", " 'end': 1304},\n", " {'entity': 'I-DAT',\n", " 'score': 0.9937779,\n", " 'index': 317,\n", " 'word': ',',\n", " 'start': 1304,\n", " 'end': 1305},\n", " {'entity': 'I-DAT',\n", " 'score': 0.99656725,\n", " 'index': 318,\n", " 'word': '▁1998.',\n", " 'start': 1306,\n", " 'end': 1311},\n", " {'entity': 'B-DAT',\n", " 'score': 0.9939989,\n", " 'index': 395,\n", " 'word': '▁April',\n", " 'start': 1652,\n", " 'end': 1657},\n", " {'entity': 'I-DAT',\n", " 'score': 0.99549806,\n", " 'index': 396,\n", " 'word': '▁8',\n", " 'start': 1658,\n", " 'end': 1659},\n", " {'entity': 'I-DAT',\n", " 'score': 0.994966,\n", " 'index': 397,\n", " 'word': ',',\n", " 'start': 1659,\n", " 'end': 1660},\n", " {'entity': 'I-DAT',\n", " 'score': 0.99757344,\n", " 'index': 398,\n", " 'word': '▁2001',\n", " 'start': 1661,\n", " 'end': 1665},\n", " {'entity': 'B-LOC',\n", " 'score': 0.77366734,\n", " 'index': 491,\n", " 'word': '▁American',\n", " 'start': 2094,\n", " 'end': 2102},\n", " {'entity': 'I-LOC',\n", " 'score': 0.5598569,\n", " 'index': 492,\n", " 'word': '▁West',\n", " 'start': 2103,\n", " 'end': 2107}]" ] }, "execution_count": 101, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"rollerhafeezh-amikom/xlm-roberta-base-ner-augmentation\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"rollerhafeezh-amikom/xlm-roberta-base-ner-augmentation\")\n", "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "\n", "ner_results = nlp(text)\n", "ner_results" ] }, { "cell_type": "code", "execution_count": 102, "id": "f5ae9b8f-c5ff-40ba-9956-1aa511d14d6c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "B-DAT 2\n", "B-LOC 3\n", "I-DAT 6\n", "I-LOC 3\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word \n", "B-DAT ▁April 2\n", "B-LOC ▁American 1\n", " ▁Cy 1\n", " ▁Mars 1\n", "I-DAT , 2\n", " ▁1998. 1\n", " ▁2001 1\n", " ▁5 1\n", " ▁8 1\n", "I-LOC do 1\n", " nia 1\n", " ▁West 1\n", "dtype: int64" ] }, "execution_count": 102, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "\n", "with open(\"47 rollerhafeezh-amikomxlm-roberta-base-ner-augmentation.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()" ] }, { "cell_type": "markdown", "id": "1eebdd71-4bb0-4c49-af4d-10343c9e273e", "metadata": {}, "source": [ "## 48 brettlin/distilbert-base-uncased-finetuned-ner" ] }, { "cell_type": "code", "execution_count": 104, "id": "39770965-3574-47f2-ad55-44bb35f11864", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'entity': 'LABEL_0',\n", " 'score': 0.9996531,\n", " 'index': 1,\n", " 'word': 'so',\n", " 'start': 0,\n", " 'end': 2},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9995907,\n", " 'index': 2,\n", " 'word': ',',\n", " 'start': 2,\n", " 'end': 3},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99943215,\n", " 'index': 3,\n", " 'word': 'if',\n", " 'start': 4,\n", " 'end': 6},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9986737,\n", " 'index': 4,\n", " 'word': 'you',\n", " 'start': 7,\n", " 'end': 10},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9979754,\n", " 'index': 5,\n", " 'word': \"'\",\n", " 'start': 10,\n", " 'end': 11},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.995782,\n", " 'index': 6,\n", " 'word': 're',\n", " 'start': 11,\n", " 'end': 13},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98806703,\n", " 'index': 7,\n", " 'word': 'a',\n", " 'start': 14,\n", " 'end': 15},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.60142565,\n", " 'index': 8,\n", " 'word': 'nasa',\n", " 'start': 16,\n", " 'end': 20},\n", " {'entity': 'LABEL_75',\n", " 'score': 0.74799323,\n", " 'index': 9,\n", " 'word': 'scientist',\n", " 'start': 21,\n", " 'end': 30},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99963987,\n", " 'index': 10,\n", " 'word': ',',\n", " 'start': 30,\n", " 'end': 31},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99952126,\n", " 'index': 11,\n", " 'word': 'you',\n", " 'start': 32,\n", " 'end': 35},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.999762,\n", " 'index': 12,\n", " 'word': 'should',\n", " 'start': 36,\n", " 'end': 42},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9995994,\n", " 'index': 13,\n", " 'word': 'be',\n", " 'start': 43,\n", " 'end': 45},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9995302,\n", " 'index': 14,\n", " 'word': 'able',\n", " 'start': 46,\n", " 'end': 50},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99958926,\n", " 'index': 15,\n", " 'word': 'to',\n", " 'start': 51,\n", " 'end': 53},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99921143,\n", " 'index': 16,\n", " 'word': 'tell',\n", " 'start': 54,\n", " 'end': 58},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99929273,\n", " 'index': 17,\n", " 'word': 'me',\n", " 'start': 59,\n", " 'end': 61},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99949706,\n", " 'index': 18,\n", " 'word': 'the',\n", " 'start': 62,\n", " 'end': 65},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99955744,\n", " 'index': 19,\n", " 'word': 'whole',\n", " 'start': 66,\n", " 'end': 71},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99942434,\n", " 'index': 20,\n", " 'word': 'story',\n", " 'start': 72,\n", " 'end': 77},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99973875,\n", " 'index': 21,\n", " 'word': 'about',\n", " 'start': 78,\n", " 'end': 83},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9998621,\n", " 'index': 22,\n", " 'word': 'the',\n", " 'start': 84,\n", " 'end': 87},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9997615,\n", " 'index': 23,\n", " 'word': 'face',\n", " 'start': 88,\n", " 'end': 92},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9995735,\n", " 'index': 24,\n", " 'word': 'on',\n", " 'start': 93,\n", " 'end': 95},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98947805,\n", " 'index': 25,\n", " 'word': 'mars',\n", " 'start': 96,\n", " 'end': 100},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99989235,\n", " 'index': 26,\n", " 'word': ',',\n", " 'start': 100,\n", " 'end': 101},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9999428,\n", " 'index': 27,\n", " 'word': 'which',\n", " 'start': 102,\n", " 'end': 107},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9999577,\n", " 'index': 28,\n", " 'word': 'obviously',\n", " 'start': 108,\n", " 'end': 117},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9999269,\n", " 'index': 29,\n", " 'word': 'is',\n", " 'start': 118,\n", " 'end': 120},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.999874,\n", " 'index': 30,\n", " 'word': 'evidence',\n", " 'start': 121,\n", " 'end': 129},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9998374,\n", " 'index': 31,\n", " 'word': 'that',\n", " 'start': 130,\n", " 'end': 134},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9997681,\n", " 'index': 32,\n", " 'word': 'there',\n", " 'start': 135,\n", " 'end': 140},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99981254,\n", " 'index': 33,\n", " 'word': 'is',\n", " 'start': 141,\n", " 'end': 143},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99965465,\n", " 'index': 34,\n", " 'word': 'life',\n", " 'start': 144,\n", " 'end': 148},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9990865,\n", " 'index': 35,\n", " 'word': 'on',\n", " 'start': 149,\n", " 'end': 151},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.97573996,\n", " 'index': 36,\n", " 'word': 'mars',\n", " 'start': 152,\n", " 'end': 156},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9998772,\n", " 'index': 37,\n", " 'word': ',',\n", " 'start': 156,\n", " 'end': 157},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.999892,\n", " 'index': 38,\n", " 'word': 'and',\n", " 'start': 158,\n", " 'end': 161},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9999051,\n", " 'index': 39,\n", " 'word': 'that',\n", " 'start': 162,\n", " 'end': 166},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99995136,\n", " 'index': 40,\n", " 'word': 'the',\n", " 'start': 167,\n", " 'end': 170},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99992394,\n", " 'index': 41,\n", " 'word': 'face',\n", " 'start': 171,\n", " 'end': 175},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9999279,\n", " 'index': 42,\n", " 'word': 'was',\n", " 'start': 176,\n", " 'end': 179},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.999905,\n", " 'index': 43,\n", " 'word': 'created',\n", " 'start': 180,\n", " 'end': 187},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9991283,\n", " 'index': 44,\n", " 'word': 'by',\n", " 'start': 188,\n", " 'end': 190},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.89696616,\n", " 'index': 45,\n", " 'word': 'aliens',\n", " 'start': 191,\n", " 'end': 197},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99989593,\n", " 'index': 46,\n", " 'word': ',',\n", " 'start': 197,\n", " 'end': 198},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9995895,\n", " 'index': 47,\n", " 'word': 'correct',\n", " 'start': 199,\n", " 'end': 206},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99978584,\n", " 'index': 48,\n", " 'word': '?',\n", " 'start': 206,\n", " 'end': 207},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99943024,\n", " 'index': 49,\n", " 'word': '\"',\n", " 'start': 207,\n", " 'end': 208},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99595106,\n", " 'index': 50,\n", " 'word': 'no',\n", " 'start': 209,\n", " 'end': 211},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99868613,\n", " 'index': 51,\n", " 'word': ',',\n", " 'start': 211,\n", " 'end': 212},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9463645,\n", " 'index': 52,\n", " 'word': 'twenty',\n", " 'start': 213,\n", " 'end': 219},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.76300526,\n", " 'index': 53,\n", " 'word': 'five',\n", " 'start': 220,\n", " 'end': 224},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9725427,\n", " 'index': 54,\n", " 'word': 'years',\n", " 'start': 225,\n", " 'end': 230},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99003905,\n", " 'index': 55,\n", " 'word': 'ago',\n", " 'start': 231,\n", " 'end': 234},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9984659,\n", " 'index': 56,\n", " 'word': ',',\n", " 'start': 234,\n", " 'end': 235},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.8449111,\n", " 'index': 57,\n", " 'word': 'our',\n", " 'start': 236,\n", " 'end': 239},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.54668593,\n", " 'index': 58,\n", " 'word': 'viking',\n", " 'start': 240,\n", " 'end': 246},\n", " {'entity': 'LABEL_14',\n", " 'score': 0.71119046,\n", " 'index': 59,\n", " 'word': '1',\n", " 'start': 247,\n", " 'end': 248},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9257255,\n", " 'index': 60,\n", " 'word': 'spacecraft',\n", " 'start': 249,\n", " 'end': 259},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9979443,\n", " 'index': 61,\n", " 'word': 'was',\n", " 'start': 260,\n", " 'end': 263},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99194145,\n", " 'index': 62,\n", " 'word': 'circling',\n", " 'start': 264,\n", " 'end': 272},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9151495,\n", " 'index': 63,\n", " 'word': 'the',\n", " 'start': 273,\n", " 'end': 276},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9498145,\n", " 'index': 64,\n", " 'word': 'planet',\n", " 'start': 277,\n", " 'end': 283},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9979522,\n", " 'index': 65,\n", " 'word': ',',\n", " 'start': 283,\n", " 'end': 284},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9976399,\n", " 'index': 66,\n", " 'word': 'snapping',\n", " 'start': 285,\n", " 'end': 293},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9884976,\n", " 'index': 67,\n", " 'word': 'photos',\n", " 'start': 294,\n", " 'end': 300},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99981576,\n", " 'index': 68,\n", " 'word': ',',\n", " 'start': 300,\n", " 'end': 301},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99983835,\n", " 'index': 69,\n", " 'word': 'when',\n", " 'start': 302,\n", " 'end': 306},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9996313,\n", " 'index': 70,\n", " 'word': 'it',\n", " 'start': 307,\n", " 'end': 309},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99968636,\n", " 'index': 71,\n", " 'word': 'spotted',\n", " 'start': 310,\n", " 'end': 317},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9996791,\n", " 'index': 72,\n", " 'word': 'the',\n", " 'start': 318,\n", " 'end': 321},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99974054,\n", " 'index': 73,\n", " 'word': 'shadowy',\n", " 'start': 322,\n", " 'end': 329},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99973494,\n", " 'index': 74,\n", " 'word': 'likeness',\n", " 'start': 330,\n", " 'end': 338},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9996382,\n", " 'index': 75,\n", " 'word': 'of',\n", " 'start': 339,\n", " 'end': 341},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9991806,\n", " 'index': 76,\n", " 'word': 'a',\n", " 'start': 342,\n", " 'end': 343},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9973673,\n", " 'index': 77,\n", " 'word': 'human',\n", " 'start': 344,\n", " 'end': 349},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.997843,\n", " 'index': 78,\n", " 'word': 'face',\n", " 'start': 350,\n", " 'end': 354},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99921536,\n", " 'index': 79,\n", " 'word': '.',\n", " 'start': 354,\n", " 'end': 355},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9981499,\n", " 'index': 80,\n", " 'word': 'us',\n", " 'start': 356,\n", " 'end': 358},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99827325,\n", " 'index': 81,\n", " 'word': 'scientists',\n", " 'start': 359,\n", " 'end': 369},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99968076,\n", " 'index': 82,\n", " 'word': 'figured',\n", " 'start': 370,\n", " 'end': 377},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9997644,\n", " 'index': 83,\n", " 'word': 'out',\n", " 'start': 378,\n", " 'end': 381},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99987495,\n", " 'index': 84,\n", " 'word': 'that',\n", " 'start': 382,\n", " 'end': 386},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9998528,\n", " 'index': 85,\n", " 'word': 'it',\n", " 'start': 387,\n", " 'end': 389},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9998704,\n", " 'index': 86,\n", " 'word': 'was',\n", " 'start': 390,\n", " 'end': 393},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9998265,\n", " 'index': 87,\n", " 'word': 'just',\n", " 'start': 394,\n", " 'end': 398},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99902964,\n", " 'index': 88,\n", " 'word': 'another',\n", " 'start': 399,\n", " 'end': 406},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9982116,\n", " 'index': 89,\n", " 'word': 'martian',\n", " 'start': 407,\n", " 'end': 414},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99879277,\n", " 'index': 90,\n", " 'word': 'mesa',\n", " 'start': 415,\n", " 'end': 419},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99952435,\n", " 'index': 91,\n", " 'word': ',',\n", " 'start': 419,\n", " 'end': 420},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99871767,\n", " 'index': 92,\n", " 'word': 'common',\n", " 'start': 421,\n", " 'end': 427},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99144477,\n", " 'index': 93,\n", " 'word': 'around',\n", " 'start': 428,\n", " 'end': 434},\n", " {'entity': 'LABEL_20',\n", " 'score': 0.7289493,\n", " 'index': 94,\n", " 'word': 'cy',\n", " 'start': 435,\n", " 'end': 437},\n", " {'entity': 'LABEL_20',\n", " 'score': 0.6414867,\n", " 'index': 95,\n", " 'word': '##don',\n", " 'start': 437,\n", " 'end': 440},\n", " {'entity': 'LABEL_20',\n", " 'score': 0.74183536,\n", " 'index': 96,\n", " 'word': '##ia',\n", " 'start': 440,\n", " 'end': 442},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9998971,\n", " 'index': 97,\n", " 'word': ',',\n", " 'start': 442,\n", " 'end': 443},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99993026,\n", " 'index': 98,\n", " 'word': 'only',\n", " 'start': 444,\n", " 'end': 448},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9999012,\n", " 'index': 99,\n", " 'word': 'this',\n", " 'start': 449,\n", " 'end': 453},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9998801,\n", " 'index': 100,\n", " 'word': 'one',\n", " 'start': 454,\n", " 'end': 457},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9999132,\n", " 'index': 101,\n", " 'word': 'had',\n", " 'start': 458,\n", " 'end': 461},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99989045,\n", " 'index': 102,\n", " 'word': 'shadows',\n", " 'start': 462,\n", " 'end': 469},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9997019,\n", " 'index': 103,\n", " 'word': 'that',\n", " 'start': 470,\n", " 'end': 474},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99966216,\n", " 'index': 104,\n", " 'word': 'made',\n", " 'start': 475,\n", " 'end': 479},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9997868,\n", " 'index': 105,\n", " 'word': 'it',\n", " 'start': 480,\n", " 'end': 482},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.999015,\n", " 'index': 106,\n", " 'word': 'look',\n", " 'start': 483,\n", " 'end': 487},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.993024,\n", " 'index': 107,\n", " 'word': 'like',\n", " 'start': 488,\n", " 'end': 492},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.8148152,\n", " 'index': 108,\n", " 'word': 'an',\n", " 'start': 493,\n", " 'end': 495},\n", " {'entity': 'LABEL_73',\n", " 'score': 0.92523,\n", " 'index': 109,\n", " 'word': 'egypt',\n", " 'start': 496,\n", " 'end': 501},\n", " {'entity': 'LABEL_74',\n", " 'score': 0.6341769,\n", " 'index': 110,\n", " 'word': '##ion',\n", " 'start': 501,\n", " 'end': 504},\n", " {'entity': 'LABEL_75',\n", " 'score': 0.9717007,\n", " 'index': 111,\n", " 'word': 'pharaoh',\n", " 'start': 505,\n", " 'end': 512},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.79483175,\n", " 'index': 112,\n", " 'word': '.',\n", " 'start': 512,\n", " 'end': 513},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9997398,\n", " 'index': 113,\n", " 'word': 'very',\n", " 'start': 514,\n", " 'end': 518},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9993025,\n", " 'index': 114,\n", " 'word': 'few',\n", " 'start': 519,\n", " 'end': 522},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99925786,\n", " 'index': 115,\n", " 'word': 'days',\n", " 'start': 523,\n", " 'end': 527},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99947757,\n", " 'index': 116,\n", " 'word': 'later',\n", " 'start': 528,\n", " 'end': 533},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99968624,\n", " 'index': 117,\n", " 'word': ',',\n", " 'start': 533,\n", " 'end': 534},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99973804,\n", " 'index': 118,\n", " 'word': 'we',\n", " 'start': 535,\n", " 'end': 537},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99975663,\n", " 'index': 119,\n", " 'word': 'revealed',\n", " 'start': 538,\n", " 'end': 546},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99991214,\n", " 'index': 120,\n", " 'word': 'the',\n", " 'start': 547,\n", " 'end': 550},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99990284,\n", " 'index': 121,\n", " 'word': 'image',\n", " 'start': 551,\n", " 'end': 556},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.997297,\n", " 'index': 122,\n", " 'word': 'for',\n", " 'start': 557,\n", " 'end': 560},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9489387,\n", " 'index': 123,\n", " 'word': 'all',\n", " 'start': 561,\n", " 'end': 564},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.90861577,\n", " 'index': 124,\n", " 'word': 'to',\n", " 'start': 565,\n", " 'end': 567},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99558485,\n", " 'index': 125,\n", " 'word': 'see',\n", " 'start': 568,\n", " 'end': 571},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99955446,\n", " 'index': 126,\n", " 'word': ',',\n", " 'start': 571,\n", " 'end': 572},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9995072,\n", " 'index': 127,\n", " 'word': 'and',\n", " 'start': 573,\n", " 'end': 576},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9996469,\n", " 'index': 128,\n", " 'word': 'we',\n", " 'start': 577,\n", " 'end': 579},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9996728,\n", " 'index': 129,\n", " 'word': 'made',\n", " 'start': 580,\n", " 'end': 584},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99978834,\n", " 'index': 130,\n", " 'word': 'sure',\n", " 'start': 585,\n", " 'end': 589},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9996469,\n", " 'index': 131,\n", " 'word': 'to',\n", " 'start': 590,\n", " 'end': 592},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9998093,\n", " 'index': 132,\n", " 'word': 'note',\n", " 'start': 593,\n", " 'end': 597},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99986935,\n", " 'index': 133,\n", " 'word': 'that',\n", " 'start': 598,\n", " 'end': 602},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9996062,\n", " 'index': 134,\n", " 'word': 'it',\n", " 'start': 603,\n", " 'end': 605},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9997197,\n", " 'index': 135,\n", " 'word': 'was',\n", " 'start': 606,\n", " 'end': 609},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9986713,\n", " 'index': 136,\n", " 'word': 'a',\n", " 'start': 610,\n", " 'end': 611},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99810624,\n", " 'index': 137,\n", " 'word': 'huge',\n", " 'start': 612,\n", " 'end': 616},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9986929,\n", " 'index': 138,\n", " 'word': 'rock',\n", " 'start': 617,\n", " 'end': 621},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9953111,\n", " 'index': 139,\n", " 'word': 'formation',\n", " 'start': 622,\n", " 'end': 631},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9994229,\n", " 'index': 140,\n", " 'word': 'that',\n", " 'start': 632,\n", " 'end': 636},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9998062,\n", " 'index': 141,\n", " 'word': 'just',\n", " 'start': 637,\n", " 'end': 641},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9986547,\n", " 'index': 142,\n", " 'word': 'resembled',\n", " 'start': 642,\n", " 'end': 651},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9972566,\n", " 'index': 143,\n", " 'word': 'a',\n", " 'start': 652,\n", " 'end': 653},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99356645,\n", " 'index': 144,\n", " 'word': 'human',\n", " 'start': 654,\n", " 'end': 659},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99711275,\n", " 'index': 145,\n", " 'word': 'head',\n", " 'start': 660,\n", " 'end': 664},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98437935,\n", " 'index': 146,\n", " 'word': 'and',\n", " 'start': 665,\n", " 'end': 668},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9968284,\n", " 'index': 147,\n", " 'word': 'face',\n", " 'start': 669,\n", " 'end': 673},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99973816,\n", " 'index': 148,\n", " 'word': ',',\n", " 'start': 673,\n", " 'end': 674},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9998491,\n", " 'index': 149,\n", " 'word': 'but',\n", " 'start': 675,\n", " 'end': 678},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9999168,\n", " 'index': 150,\n", " 'word': 'all',\n", " 'start': 679,\n", " 'end': 682},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99989974,\n", " 'index': 151,\n", " 'word': 'of',\n", " 'start': 683,\n", " 'end': 685},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9998741,\n", " 'index': 152,\n", " 'word': 'it',\n", " 'start': 686,\n", " 'end': 688},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9999095,\n", " 'index': 153,\n", " 'word': 'was',\n", " 'start': 689,\n", " 'end': 692},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9998404,\n", " 'index': 154,\n", " 'word': 'formed',\n", " 'start': 693,\n", " 'end': 699},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99984217,\n", " 'index': 155,\n", " 'word': 'by',\n", " 'start': 700,\n", " 'end': 702},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9990085,\n", " 'index': 156,\n", " 'word': 'shadows',\n", " 'start': 703,\n", " 'end': 710},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99881077,\n", " 'index': 157,\n", " 'word': '.',\n", " 'start': 710,\n", " 'end': 711},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9996306,\n", " 'index': 158,\n", " 'word': 'we',\n", " 'start': 712,\n", " 'end': 714},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99975604,\n", " 'index': 159,\n", " 'word': 'only',\n", " 'start': 715,\n", " 'end': 719},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9996767,\n", " 'index': 160,\n", " 'word': 'announced',\n", " 'start': 720,\n", " 'end': 729},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.999858,\n", " 'index': 161,\n", " 'word': 'it',\n", " 'start': 730,\n", " 'end': 732},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9996333,\n", " 'index': 162,\n", " 'word': 'because',\n", " 'start': 733,\n", " 'end': 740},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9987091,\n", " 'index': 163,\n", " 'word': 'we',\n", " 'start': 741,\n", " 'end': 743},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9981231,\n", " 'index': 164,\n", " 'word': 'thought',\n", " 'start': 744,\n", " 'end': 751},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9989918,\n", " 'index': 165,\n", " 'word': 'it',\n", " 'start': 752,\n", " 'end': 754},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9983368,\n", " 'index': 166,\n", " 'word': 'would',\n", " 'start': 755,\n", " 'end': 760},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99723864,\n", " 'index': 167,\n", " 'word': 'be',\n", " 'start': 761,\n", " 'end': 763},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9965023,\n", " 'index': 168,\n", " 'word': 'a',\n", " 'start': 764,\n", " 'end': 765},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9949131,\n", " 'index': 169,\n", " 'word': 'good',\n", " 'start': 766,\n", " 'end': 770},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99544805,\n", " 'index': 170,\n", " 'word': 'way',\n", " 'start': 771,\n", " 'end': 774},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9924279,\n", " 'index': 171,\n", " 'word': 'to',\n", " 'start': 775,\n", " 'end': 777},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9778621,\n", " 'index': 172,\n", " 'word': 'engage',\n", " 'start': 778,\n", " 'end': 784},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9591297,\n", " 'index': 173,\n", " 'word': 'the',\n", " 'start': 785,\n", " 'end': 788},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9785558,\n", " 'index': 174,\n", " 'word': 'public',\n", " 'start': 789,\n", " 'end': 795},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9795846,\n", " 'index': 175,\n", " 'word': 'with',\n", " 'start': 796,\n", " 'end': 800},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99311686,\n", " 'index': 176,\n", " 'word': 'nasa',\n", " 'start': 801,\n", " 'end': 805},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99412537,\n", " 'index': 177,\n", " 'word': \"'\",\n", " 'start': 805,\n", " 'end': 806},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9853887,\n", " 'index': 178,\n", " 'word': 's',\n", " 'start': 806,\n", " 'end': 807},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98601574,\n", " 'index': 179,\n", " 'word': 'findings',\n", " 'start': 808,\n", " 'end': 816},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99843234,\n", " 'index': 180,\n", " 'word': ',',\n", " 'start': 816,\n", " 'end': 817},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9949004,\n", " 'index': 181,\n", " 'word': 'and',\n", " 'start': 818,\n", " 'end': 821},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.990635,\n", " 'index': 182,\n", " 'word': 'at',\n", " 'start': 822,\n", " 'end': 824},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98665386,\n", " 'index': 183,\n", " 'word': '##rra',\n", " 'start': 824,\n", " 'end': 827},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99445266,\n", " 'index': 184,\n", " 'word': '##ct',\n", " 'start': 827,\n", " 'end': 829},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9913902,\n", " 'index': 185,\n", " 'word': 'attention',\n", " 'start': 830,\n", " 'end': 839},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99121255,\n", " 'index': 186,\n", " 'word': 'to',\n", " 'start': 840,\n", " 'end': 842},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9982174,\n", " 'index': 187,\n", " 'word': 'mars',\n", " 'start': 843,\n", " 'end': 847},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9925493,\n", " 'index': 188,\n", " 'word': '-',\n", " 'start': 847,\n", " 'end': 848},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9925673,\n", " 'index': 189,\n", " 'word': '-',\n", " 'start': 848,\n", " 'end': 849},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99865615,\n", " 'index': 190,\n", " 'word': 'and',\n", " 'start': 850,\n", " 'end': 853},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99926656,\n", " 'index': 191,\n", " 'word': 'it',\n", " 'start': 854,\n", " 'end': 856},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9956815,\n", " 'index': 192,\n", " 'word': 'did',\n", " 'start': 857,\n", " 'end': 860},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9459845,\n", " 'index': 193,\n", " 'word': '.',\n", " 'start': 860,\n", " 'end': 861},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9999012,\n", " 'index': 194,\n", " 'word': 'the',\n", " 'start': 863,\n", " 'end': 866},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9998677,\n", " 'index': 195,\n", " 'word': 'face',\n", " 'start': 867,\n", " 'end': 871},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99969125,\n", " 'index': 196,\n", " 'word': 'on',\n", " 'start': 872,\n", " 'end': 874},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99862087,\n", " 'index': 197,\n", " 'word': 'mars',\n", " 'start': 875,\n", " 'end': 879},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99946374,\n", " 'index': 198,\n", " 'word': 'soon',\n", " 'start': 880,\n", " 'end': 884},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99820757,\n", " 'index': 199,\n", " 'word': 'became',\n", " 'start': 885,\n", " 'end': 891},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9943071,\n", " 'index': 200,\n", " 'word': 'a',\n", " 'start': 892,\n", " 'end': 893},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9949462,\n", " 'index': 201,\n", " 'word': 'pop',\n", " 'start': 894,\n", " 'end': 897},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.996988,\n", " 'index': 202,\n", " 'word': 'icon',\n", " 'start': 898,\n", " 'end': 902},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99945325,\n", " 'index': 203,\n", " 'word': ';',\n", " 'start': 902,\n", " 'end': 903},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9961422,\n", " 'index': 204,\n", " 'word': 'shot',\n", " 'start': 904,\n", " 'end': 908},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9605378,\n", " 'index': 205,\n", " 'word': 'in',\n", " 'start': 909,\n", " 'end': 911},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9329523,\n", " 'index': 206,\n", " 'word': 'movies',\n", " 'start': 912,\n", " 'end': 918},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99609864,\n", " 'index': 207,\n", " 'word': ',',\n", " 'start': 918,\n", " 'end': 919},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9907041,\n", " 'index': 208,\n", " 'word': 'appeared',\n", " 'start': 920,\n", " 'end': 928},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.95241463,\n", " 'index': 209,\n", " 'word': 'in',\n", " 'start': 929,\n", " 'end': 931},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.86189944,\n", " 'index': 210,\n", " 'word': 'books',\n", " 'start': 932,\n", " 'end': 937},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.8810842,\n", " 'index': 211,\n", " 'word': ',',\n", " 'start': 937,\n", " 'end': 938},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.7707626,\n", " 'index': 212,\n", " 'word': 'magazines',\n", " 'start': 939,\n", " 'end': 948},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9275165,\n", " 'index': 213,\n", " 'word': ',',\n", " 'start': 948,\n", " 'end': 949},\n", " {'entity': 'LABEL_73',\n", " 'score': 0.4515065,\n", " 'index': 214,\n", " 'word': 'radio',\n", " 'start': 950,\n", " 'end': 955},\n", " {'entity': 'LABEL_74',\n", " 'score': 0.871169,\n", " 'index': 215,\n", " 'word': 'talk',\n", " 'start': 956,\n", " 'end': 960},\n", " {'entity': 'LABEL_75',\n", " 'score': 0.7055705,\n", " 'index': 216,\n", " 'word': 'shows',\n", " 'start': 961,\n", " 'end': 966},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.95585626,\n", " 'index': 217,\n", " 'word': ',',\n", " 'start': 966,\n", " 'end': 967},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.76473933,\n", " 'index': 218,\n", " 'word': 'and',\n", " 'start': 968,\n", " 'end': 971},\n", " {'entity': 'LABEL_14',\n", " 'score': 0.3433743,\n", " 'index': 219,\n", " 'word': 'haunted',\n", " 'start': 972,\n", " 'end': 979},\n", " {'entity': 'LABEL_14',\n", " 'score': 0.6025852,\n", " 'index': 220,\n", " 'word': 'grocery',\n", " 'start': 980,\n", " 'end': 987},\n", " {'entity': 'LABEL_14',\n", " 'score': 0.50706923,\n", " 'index': 221,\n", " 'word': 'store',\n", " 'start': 988,\n", " 'end': 993},\n", " {'entity': 'LABEL_14',\n", " 'score': 0.37388822,\n", " 'index': 222,\n", " 'word': 'check',\n", " 'start': 994,\n", " 'end': 999},\n", " {'entity': 'LABEL_14',\n", " 'score': 0.5225139,\n", " 'index': 223,\n", " 'word': '##out',\n", " 'start': 999,\n", " 'end': 1002},\n", " {'entity': 'LABEL_75',\n", " 'score': 0.5667381,\n", " 'index': 224,\n", " 'word': 'lines',\n", " 'start': 1003,\n", " 'end': 1008},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9266954,\n", " 'index': 225,\n", " 'word': 'for',\n", " 'start': 1009,\n", " 'end': 1012},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9151958,\n", " 'index': 226,\n", " 'word': '25',\n", " 'start': 1013,\n", " 'end': 1015},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.93549967,\n", " 'index': 227,\n", " 'word': 'years',\n", " 'start': 1016,\n", " 'end': 1021},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.8062772,\n", " 'index': 228,\n", " 'word': '.',\n", " 'start': 1021,\n", " 'end': 1022},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9965282,\n", " 'index': 229,\n", " 'word': 'some',\n", " 'start': 1023,\n", " 'end': 1027},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99480104,\n", " 'index': 230,\n", " 'word': 'people',\n", " 'start': 1028,\n", " 'end': 1034},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9963915,\n", " 'index': 231,\n", " 'word': 'thought',\n", " 'start': 1035,\n", " 'end': 1042},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99992347,\n", " 'index': 232,\n", " 'word': 'the',\n", " 'start': 1043,\n", " 'end': 1046},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99991715,\n", " 'index': 233,\n", " 'word': 'natural',\n", " 'start': 1047,\n", " 'end': 1054},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99994695,\n", " 'index': 234,\n", " 'word': 'land',\n", " 'start': 1055,\n", " 'end': 1059},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99992645,\n", " 'index': 235,\n", " 'word': '##form',\n", " 'start': 1059,\n", " 'end': 1063},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99985206,\n", " 'index': 236,\n", " 'word': 'was',\n", " 'start': 1064,\n", " 'end': 1067},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99985313,\n", " 'index': 237,\n", " 'word': 'evidence',\n", " 'start': 1068,\n", " 'end': 1076},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9997029,\n", " 'index': 238,\n", " 'word': 'of',\n", " 'start': 1077,\n", " 'end': 1079},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9996062,\n", " 'index': 239,\n", " 'word': 'life',\n", " 'start': 1080,\n", " 'end': 1084},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99725705,\n", " 'index': 240,\n", " 'word': 'on',\n", " 'start': 1085,\n", " 'end': 1087},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.94723123,\n", " 'index': 241,\n", " 'word': 'mars',\n", " 'start': 1088,\n", " 'end': 1092},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9994099,\n", " 'index': 242,\n", " 'word': ',',\n", " 'start': 1092,\n", " 'end': 1093},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.997752,\n", " 'index': 243,\n", " 'word': 'and',\n", " 'start': 1094,\n", " 'end': 1097},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99548113,\n", " 'index': 244,\n", " 'word': 'that',\n", " 'start': 1098,\n", " 'end': 1102},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9863447,\n", " 'index': 245,\n", " 'word': 'us',\n", " 'start': 1103,\n", " 'end': 1105},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9810527,\n", " 'index': 246,\n", " 'word': 'scientists',\n", " 'start': 1106,\n", " 'end': 1116},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99548376,\n", " 'index': 247,\n", " 'word': 'wanted',\n", " 'start': 1117,\n", " 'end': 1123},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.996734,\n", " 'index': 248,\n", " 'word': 'to',\n", " 'start': 1124,\n", " 'end': 1126},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9977385,\n", " 'index': 249,\n", " 'word': 'hide',\n", " 'start': 1127,\n", " 'end': 1131},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9983668,\n", " 'index': 250,\n", " 'word': 'it',\n", " 'start': 1132,\n", " 'end': 1134},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99666613,\n", " 'index': 251,\n", " 'word': ',',\n", " 'start': 1134,\n", " 'end': 1135},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9986473,\n", " 'index': 252,\n", " 'word': 'but',\n", " 'start': 1136,\n", " 'end': 1139},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9985128,\n", " 'index': 253,\n", " 'word': 'really',\n", " 'start': 1140,\n", " 'end': 1146},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99492836,\n", " 'index': 254,\n", " 'word': ',',\n", " 'start': 1146,\n", " 'end': 1147},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.8159351,\n", " 'index': 255,\n", " 'word': 'the',\n", " 'start': 1148,\n", " 'end': 1151},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.65608,\n", " 'index': 256,\n", " 'word': 'defenders',\n", " 'start': 1152,\n", " 'end': 1161},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5334063,\n", " 'index': 257,\n", " 'word': 'of',\n", " 'start': 1162,\n", " 'end': 1164},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.36823764,\n", " 'index': 258,\n", " 'word': 'the',\n", " 'start': 1165,\n", " 'end': 1168},\n", " {'entity': 'LABEL_14',\n", " 'score': 0.7509814,\n", " 'index': 259,\n", " 'word': 'nasa',\n", " 'start': 1169,\n", " 'end': 1173},\n", " {'entity': 'LABEL_14',\n", " 'score': 0.6963956,\n", " 'index': 260,\n", " 'word': 'budget',\n", " 'start': 1174,\n", " 'end': 1180},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.85757244,\n", " 'index': 261,\n", " 'word': 'wish',\n", " 'start': 1181,\n", " 'end': 1185},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98319745,\n", " 'index': 262,\n", " 'word': 'there',\n", " 'start': 1186,\n", " 'end': 1191},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9912368,\n", " 'index': 263,\n", " 'word': 'was',\n", " 'start': 1192,\n", " 'end': 1195},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99493057,\n", " 'index': 264,\n", " 'word': 'ancient',\n", " 'start': 1196,\n", " 'end': 1203},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98931223,\n", " 'index': 265,\n", " 'word': 'civilization',\n", " 'start': 1204,\n", " 'end': 1216},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98382854,\n", " 'index': 266,\n", " 'word': 'on',\n", " 'start': 1217,\n", " 'end': 1219},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.96409446,\n", " 'index': 267,\n", " 'word': 'mars',\n", " 'start': 1220,\n", " 'end': 1224},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99208647,\n", " 'index': 268,\n", " 'word': '.',\n", " 'start': 1224,\n", " 'end': 1225},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9994597,\n", " 'index': 269,\n", " 'word': 'we',\n", " 'start': 1226,\n", " 'end': 1228},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99947494,\n", " 'index': 270,\n", " 'word': 'decided',\n", " 'start': 1229,\n", " 'end': 1236},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9994407,\n", " 'index': 271,\n", " 'word': 'to',\n", " 'start': 1237,\n", " 'end': 1239},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9995198,\n", " 'index': 272,\n", " 'word': 'take',\n", " 'start': 1240,\n", " 'end': 1244},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.999316,\n", " 'index': 273,\n", " 'word': 'another',\n", " 'start': 1245,\n", " 'end': 1252},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9993339,\n", " 'index': 274,\n", " 'word': 'shot',\n", " 'start': 1253,\n", " 'end': 1257},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9993856,\n", " 'index': 275,\n", " 'word': 'just',\n", " 'start': 1258,\n", " 'end': 1262},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9983365,\n", " 'index': 276,\n", " 'word': 'to',\n", " 'start': 1263,\n", " 'end': 1265},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9946636,\n", " 'index': 277,\n", " 'word': 'make',\n", " 'start': 1266,\n", " 'end': 1270},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9865796,\n", " 'index': 278,\n", " 'word': 'sure',\n", " 'start': 1271,\n", " 'end': 1275},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9879188,\n", " 'index': 279,\n", " 'word': 'we',\n", " 'start': 1276,\n", " 'end': 1278},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.93629557,\n", " 'index': 280,\n", " 'word': 'weren',\n", " 'start': 1279,\n", " 'end': 1284},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.81539416,\n", " 'index': 281,\n", " 'word': \"'\",\n", " 'start': 1284,\n", " 'end': 1285},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9333922,\n", " 'index': 282,\n", " 'word': 't',\n", " 'start': 1285,\n", " 'end': 1286},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9301563,\n", " 'index': 283,\n", " 'word': 'wrong',\n", " 'start': 1287,\n", " 'end': 1292},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9978479,\n", " 'index': 284,\n", " 'word': ',',\n", " 'start': 1292,\n", " 'end': 1293},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.97111917,\n", " 'index': 285,\n", " 'word': 'on',\n", " 'start': 1294,\n", " 'end': 1296},\n", " {'entity': 'LABEL_24',\n", " 'score': 0.9933061,\n", " 'index': 286,\n", " 'word': 'april',\n", " 'start': 1297,\n", " 'end': 1302},\n", " {'entity': 'LABEL_24',\n", " 'score': 0.99215525,\n", " 'index': 287,\n", " 'word': '5',\n", " 'start': 1303,\n", " 'end': 1304},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.7865959,\n", " 'index': 288,\n", " 'word': ',',\n", " 'start': 1304,\n", " 'end': 1305},\n", " {'entity': 'LABEL_24',\n", " 'score': 0.98310995,\n", " 'index': 289,\n", " 'word': '1998',\n", " 'start': 1306,\n", " 'end': 1310},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.86976135,\n", " 'index': 290,\n", " 'word': '.',\n", " 'start': 1310,\n", " 'end': 1311},\n", " {'entity': 'LABEL_5',\n", " 'score': 0.70154727,\n", " 'index': 291,\n", " 'word': 'michael',\n", " 'start': 1312,\n", " 'end': 1319},\n", " {'entity': 'LABEL_7',\n", " 'score': 0.97676635,\n", " 'index': 292,\n", " 'word': 'mali',\n", " 'start': 1320,\n", " 'end': 1324},\n", " {'entity': 'LABEL_7',\n", " 'score': 0.96749806,\n", " 'index': 293,\n", " 'word': '##n',\n", " 'start': 1324,\n", " 'end': 1325},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.978467,\n", " 'index': 294,\n", " 'word': 'and',\n", " 'start': 1326,\n", " 'end': 1329},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.8586074,\n", " 'index': 295,\n", " 'word': 'his',\n", " 'start': 1330,\n", " 'end': 1333},\n", " {'entity': 'LABEL_14',\n", " 'score': 0.525903,\n", " 'index': 296,\n", " 'word': 'mars',\n", " 'start': 1334,\n", " 'end': 1338},\n", " {'entity': 'LABEL_74',\n", " 'score': 0.3780676,\n", " 'index': 297,\n", " 'word': 'orbit',\n", " 'start': 1339,\n", " 'end': 1344},\n", " {'entity': 'LABEL_74',\n", " 'score': 0.4745661,\n", " 'index': 298,\n", " 'word': '##er',\n", " 'start': 1344,\n", " 'end': 1346},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9404595,\n", " 'index': 299,\n", " 'word': 'camera',\n", " 'start': 1347,\n", " 'end': 1353},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9960602,\n", " 'index': 300,\n", " 'word': 'team',\n", " 'start': 1354,\n", " 'end': 1358},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99968743,\n", " 'index': 301,\n", " 'word': 'took',\n", " 'start': 1359,\n", " 'end': 1363},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99965096,\n", " 'index': 302,\n", " 'word': 'a',\n", " 'start': 1364,\n", " 'end': 1365},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99950874,\n", " 'index': 303,\n", " 'word': 'picture',\n", " 'start': 1366,\n", " 'end': 1373},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9994449,\n", " 'index': 304,\n", " 'word': 'that',\n", " 'start': 1374,\n", " 'end': 1378},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99869066,\n", " 'index': 305,\n", " 'word': 'was',\n", " 'start': 1379,\n", " 'end': 1382},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.977216,\n", " 'index': 306,\n", " 'word': 'ten',\n", " 'start': 1383,\n", " 'end': 1386},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99482286,\n", " 'index': 307,\n", " 'word': 'times',\n", " 'start': 1387,\n", " 'end': 1392},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.994201,\n", " 'index': 308,\n", " 'word': 'sharpe',\n", " 'start': 1393,\n", " 'end': 1399},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9958853,\n", " 'index': 309,\n", " 'word': '##r',\n", " 'start': 1399,\n", " 'end': 1400},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9956436,\n", " 'index': 310,\n", " 'word': 'than',\n", " 'start': 1401,\n", " 'end': 1405},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9973666,\n", " 'index': 311,\n", " 'word': 'the',\n", " 'start': 1406,\n", " 'end': 1409},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9975478,\n", " 'index': 312,\n", " 'word': 'original',\n", " 'start': 1410,\n", " 'end': 1418},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9565762,\n", " 'index': 313,\n", " 'word': 'viking',\n", " 'start': 1419,\n", " 'end': 1425},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99581724,\n", " 'index': 314,\n", " 'word': 'photos',\n", " 'start': 1426,\n", " 'end': 1432},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9998056,\n", " 'index': 315,\n", " 'word': ',',\n", " 'start': 1432,\n", " 'end': 1433},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99983275,\n", " 'index': 316,\n", " 'word': 'revealing',\n", " 'start': 1434,\n", " 'end': 1443},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9998454,\n", " 'index': 317,\n", " 'word': 'a',\n", " 'start': 1444,\n", " 'end': 1445},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9998418,\n", " 'index': 318,\n", " 'word': 'natural',\n", " 'start': 1446,\n", " 'end': 1453},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99992263,\n", " 'index': 319,\n", " 'word': 'land',\n", " 'start': 1454,\n", " 'end': 1458},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9998939,\n", " 'index': 320,\n", " 'word': '##form',\n", " 'start': 1458,\n", " 'end': 1462},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99988616,\n", " 'index': 321,\n", " 'word': ',',\n", " 'start': 1462,\n", " 'end': 1463},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99994576,\n", " 'index': 322,\n", " 'word': 'which',\n", " 'start': 1464,\n", " 'end': 1469},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9998746,\n", " 'index': 323,\n", " 'word': 'meant',\n", " 'start': 1470,\n", " 'end': 1475},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.999754,\n", " 'index': 324,\n", " 'word': 'no',\n", " 'start': 1476,\n", " 'end': 1478},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9965797,\n", " 'index': 325,\n", " 'word': 'alien',\n", " 'start': 1479,\n", " 'end': 1484},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9921788,\n", " 'index': 326,\n", " 'word': 'monument',\n", " 'start': 1485,\n", " 'end': 1493},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99780697,\n", " 'index': 327,\n", " 'word': '.',\n", " 'start': 1493,\n", " 'end': 1494},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99951005,\n", " 'index': 328,\n", " 'word': '\"',\n", " 'start': 1495,\n", " 'end': 1496},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9996767,\n", " 'index': 329,\n", " 'word': 'but',\n", " 'start': 1496,\n", " 'end': 1499},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9999068,\n", " 'index': 330,\n", " 'word': 'that',\n", " 'start': 1500,\n", " 'end': 1504},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99991024,\n", " 'index': 331,\n", " 'word': 'picture',\n", " 'start': 1505,\n", " 'end': 1512},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99979264,\n", " 'index': 332,\n", " 'word': 'wasn',\n", " 'start': 1513,\n", " 'end': 1517},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99916065,\n", " 'index': 333,\n", " 'word': \"'\",\n", " 'start': 1517,\n", " 'end': 1518},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99970406,\n", " 'index': 334,\n", " 'word': 't',\n", " 'start': 1518,\n", " 'end': 1519},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99962485,\n", " 'index': 335,\n", " 'word': 'very',\n", " 'start': 1520,\n", " 'end': 1524},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99969125,\n", " 'index': 336,\n", " 'word': 'clear',\n", " 'start': 1525,\n", " 'end': 1530},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9984927,\n", " 'index': 337,\n", " 'word': 'at',\n", " 'start': 1531,\n", " 'end': 1533},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9987503,\n", " 'index': 338,\n", " 'word': 'all',\n", " 'start': 1534,\n", " 'end': 1537},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.999587,\n", " 'index': 339,\n", " 'word': ',',\n", " 'start': 1537,\n", " 'end': 1538},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99991965,\n", " 'index': 340,\n", " 'word': 'which',\n", " 'start': 1539,\n", " 'end': 1544},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99991965,\n", " 'index': 341,\n", " 'word': 'could',\n", " 'start': 1545,\n", " 'end': 1550},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9998634,\n", " 'index': 342,\n", " 'word': 'mean',\n", " 'start': 1551,\n", " 'end': 1555},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99986553,\n", " 'index': 343,\n", " 'word': 'alien',\n", " 'start': 1556,\n", " 'end': 1561},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99974054,\n", " 'index': 344,\n", " 'word': 'markings',\n", " 'start': 1562,\n", " 'end': 1570},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9996536,\n", " 'index': 345,\n", " 'word': 'were',\n", " 'start': 1571,\n", " 'end': 1575},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99719834,\n", " 'index': 346,\n", " 'word': 'hidden',\n", " 'start': 1576,\n", " 'end': 1582},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9992161,\n", " 'index': 347,\n", " 'word': 'by',\n", " 'start': 1583,\n", " 'end': 1585},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99676174,\n", " 'index': 348,\n", " 'word': 'haze',\n", " 'start': 1586,\n", " 'end': 1590},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9996555,\n", " 'index': 349,\n", " 'word': '\"',\n", " 'start': 1590,\n", " 'end': 1591},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9827891,\n", " 'index': 350,\n", " 'word': 'well',\n", " 'start': 1592,\n", " 'end': 1596},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.96698594,\n", " 'index': 351,\n", " 'word': 'no',\n", " 'start': 1597,\n", " 'end': 1599},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9919774,\n", " 'index': 352,\n", " 'word': ',',\n", " 'start': 1599,\n", " 'end': 1600},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9584662,\n", " 'index': 353,\n", " 'word': 'yes',\n", " 'start': 1601,\n", " 'end': 1604},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99590755,\n", " 'index': 354,\n", " 'word': 'that',\n", " 'start': 1605,\n", " 'end': 1609},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9936069,\n", " 'index': 355,\n", " 'word': 'rumor',\n", " 'start': 1610,\n", " 'end': 1615},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.97964877,\n", " 'index': 356,\n", " 'word': 'started',\n", " 'start': 1616,\n", " 'end': 1623},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.96804506,\n", " 'index': 357,\n", " 'word': ',',\n", " 'start': 1623,\n", " 'end': 1624},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9974667,\n", " 'index': 358,\n", " 'word': 'but',\n", " 'start': 1625,\n", " 'end': 1628},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99407756,\n", " 'index': 359,\n", " 'word': 'to',\n", " 'start': 1629,\n", " 'end': 1631},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99163276,\n", " 'index': 360,\n", " 'word': 'prove',\n", " 'start': 1632,\n", " 'end': 1637},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9937564,\n", " 'index': 361,\n", " 'word': 'them',\n", " 'start': 1638,\n", " 'end': 1642},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9836573,\n", " 'index': 362,\n", " 'word': 'wrong',\n", " 'start': 1643,\n", " 'end': 1648},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98289907,\n", " 'index': 363,\n", " 'word': 'on',\n", " 'start': 1649,\n", " 'end': 1651},\n", " {'entity': 'LABEL_24',\n", " 'score': 0.99786407,\n", " 'index': 364,\n", " 'word': 'april',\n", " 'start': 1652,\n", " 'end': 1657},\n", " {'entity': 'LABEL_24',\n", " 'score': 0.99625564,\n", " 'index': 365,\n", " 'word': '8',\n", " 'start': 1658,\n", " 'end': 1659},\n", " {'entity': 'LABEL_24',\n", " 'score': 0.65599644,\n", " 'index': 366,\n", " 'word': ',',\n", " 'start': 1659,\n", " 'end': 1660},\n", " {'entity': 'LABEL_24',\n", " 'score': 0.9716897,\n", " 'index': 367,\n", " 'word': '2001',\n", " 'start': 1661,\n", " 'end': 1665},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9993957,\n", " 'index': 368,\n", " 'word': 'we',\n", " 'start': 1666,\n", " 'end': 1668},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9994825,\n", " 'index': 369,\n", " 'word': 'decided',\n", " 'start': 1669,\n", " 'end': 1676},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9994381,\n", " 'index': 370,\n", " 'word': 'to',\n", " 'start': 1677,\n", " 'end': 1679},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9996543,\n", " 'index': 371,\n", " 'word': 'take',\n", " 'start': 1680,\n", " 'end': 1684},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99964786,\n", " 'index': 372,\n", " 'word': 'another',\n", " 'start': 1685,\n", " 'end': 1692},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.999567,\n", " 'index': 373,\n", " 'word': 'picture',\n", " 'start': 1693,\n", " 'end': 1700},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99953973,\n", " 'index': 374,\n", " 'word': ',',\n", " 'start': 1700,\n", " 'end': 1701},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99928975,\n", " 'index': 375,\n", " 'word': 'making',\n", " 'start': 1702,\n", " 'end': 1708},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9992249,\n", " 'index': 376,\n", " 'word': 'sure',\n", " 'start': 1709,\n", " 'end': 1713},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.993779,\n", " 'index': 377,\n", " 'word': 'it',\n", " 'start': 1714,\n", " 'end': 1716},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9736397,\n", " 'index': 378,\n", " 'word': 'was',\n", " 'start': 1717,\n", " 'end': 1720},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.7797392,\n", " 'index': 379,\n", " 'word': 'a',\n", " 'start': 1721,\n", " 'end': 1722},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.29156896,\n", " 'index': 380,\n", " 'word': 'cloud',\n", " 'start': 1723,\n", " 'end': 1728},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.40196428,\n", " 'index': 381,\n", " 'word': '##less',\n", " 'start': 1728,\n", " 'end': 1732},\n", " {'entity': 'LABEL_10',\n", " 'score': 0.2967023,\n", " 'index': 382,\n", " 'word': 'summer',\n", " 'start': 1733,\n", " 'end': 1739},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5588353,\n", " 'index': 383,\n", " 'word': 'day',\n", " 'start': 1740,\n", " 'end': 1743},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99896514,\n", " 'index': 384,\n", " 'word': '.',\n", " 'start': 1743,\n", " 'end': 1744},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.995736,\n", " 'index': 385,\n", " 'word': 'mali',\n", " 'start': 1745,\n", " 'end': 1749},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.977207,\n", " 'index': 386,\n", " 'word': '##n',\n", " 'start': 1749,\n", " 'end': 1750},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99926835,\n", " 'index': 387,\n", " 'word': \"'\",\n", " 'start': 1750,\n", " 'end': 1751},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99910814,\n", " 'index': 388,\n", " 'word': 's',\n", " 'start': 1751,\n", " 'end': 1752},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99894994,\n", " 'index': 389,\n", " 'word': 'team',\n", " 'start': 1753,\n", " 'end': 1757},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99952054,\n", " 'index': 390,\n", " 'word': 'captured',\n", " 'start': 1758,\n", " 'end': 1766},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99940646,\n", " 'index': 391,\n", " 'word': 'an',\n", " 'start': 1767,\n", " 'end': 1769},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9992894,\n", " 'index': 392,\n", " 'word': 'amazing',\n", " 'start': 1770,\n", " 'end': 1777},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99922264,\n", " 'index': 393,\n", " 'word': 'photo',\n", " 'start': 1778,\n", " 'end': 1783},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99742824,\n", " 'index': 394,\n", " 'word': 'using',\n", " 'start': 1784,\n", " 'end': 1789},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99749315,\n", " 'index': 395,\n", " 'word': 'the',\n", " 'start': 1790,\n", " 'end': 1793},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9970276,\n", " 'index': 396,\n", " 'word': 'camera',\n", " 'start': 1794,\n", " 'end': 1800},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.993164,\n", " 'index': 397,\n", " 'word': \"'\",\n", " 'start': 1800,\n", " 'end': 1801},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9909326,\n", " 'index': 398,\n", " 'word': 's',\n", " 'start': 1801,\n", " 'end': 1802},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9945222,\n", " 'index': 399,\n", " 'word': 'absolute',\n", " 'start': 1803,\n", " 'end': 1811},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99423146,\n", " 'index': 400,\n", " 'word': 'maximum',\n", " 'start': 1812,\n", " 'end': 1819},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.98917,\n", " 'index': 401,\n", " 'word': 'revolution',\n", " 'start': 1820,\n", " 'end': 1830},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.8007525,\n", " 'index': 402,\n", " 'word': '.',\n", " 'start': 1830,\n", " 'end': 1831},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9982333,\n", " 'index': 403,\n", " 'word': 'with',\n", " 'start': 1832,\n", " 'end': 1836},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99956816,\n", " 'index': 404,\n", " 'word': 'this',\n", " 'start': 1837,\n", " 'end': 1841},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9997185,\n", " 'index': 405,\n", " 'word': 'camera',\n", " 'start': 1842,\n", " 'end': 1848},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99758506,\n", " 'index': 406,\n", " 'word': 'you',\n", " 'start': 1849,\n", " 'end': 1852},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9970925,\n", " 'index': 407,\n", " 'word': 'can',\n", " 'start': 1853,\n", " 'end': 1856},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99829715,\n", " 'index': 408,\n", " 'word': 'disc',\n", " 'start': 1857,\n", " 'end': 1861},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9970024,\n", " 'index': 409,\n", " 'word': '##ern',\n", " 'start': 1861,\n", " 'end': 1864},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99906355,\n", " 'index': 410,\n", " 'word': 'things',\n", " 'start': 1865,\n", " 'end': 1871},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99950504,\n", " 'index': 411,\n", " 'word': 'in',\n", " 'start': 1872,\n", " 'end': 1874},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99918514,\n", " 'index': 412,\n", " 'word': 'a',\n", " 'start': 1875,\n", " 'end': 1876},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99943334,\n", " 'index': 413,\n", " 'word': 'digital',\n", " 'start': 1877,\n", " 'end': 1884},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9996412,\n", " 'index': 414,\n", " 'word': 'image',\n", " 'start': 1885,\n", " 'end': 1890},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9993812,\n", " 'index': 415,\n", " 'word': ',',\n", " 'start': 1890,\n", " 'end': 1891},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99675,\n", " 'index': 416,\n", " 'word': '3',\n", " 'start': 1892,\n", " 'end': 1893},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99862754,\n", " 'index': 417,\n", " 'word': 'times',\n", " 'start': 1894,\n", " 'end': 1899},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99918944,\n", " 'index': 418,\n", " 'word': 'bigger',\n", " 'start': 1900,\n", " 'end': 1906},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9990243,\n", " 'index': 419,\n", " 'word': 'than',\n", " 'start': 1907,\n", " 'end': 1911},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9996014,\n", " 'index': 420,\n", " 'word': 'the',\n", " 'start': 1912,\n", " 'end': 1915},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9998192,\n", " 'index': 421,\n", " 'word': 'pixel',\n", " 'start': 1916,\n", " 'end': 1921},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99965644,\n", " 'index': 422,\n", " 'word': 'size',\n", " 'start': 1922,\n", " 'end': 1926},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9998871,\n", " 'index': 423,\n", " 'word': 'which',\n", " 'start': 1927,\n", " 'end': 1932},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9996221,\n", " 'index': 424,\n", " 'word': 'means',\n", " 'start': 1933,\n", " 'end': 1938},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9996413,\n", " 'index': 425,\n", " 'word': 'if',\n", " 'start': 1939,\n", " 'end': 1941},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9990324,\n", " 'index': 426,\n", " 'word': 'there',\n", " 'start': 1942,\n", " 'end': 1947},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9986166,\n", " 'index': 427,\n", " 'word': 'were',\n", " 'start': 1948,\n", " 'end': 1952},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99817145,\n", " 'index': 428,\n", " 'word': 'any',\n", " 'start': 1953,\n", " 'end': 1956},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99731725,\n", " 'index': 429,\n", " 'word': 'signs',\n", " 'start': 1957,\n", " 'end': 1962},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99823487,\n", " 'index': 430,\n", " 'word': 'of',\n", " 'start': 1963,\n", " 'end': 1965},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99668664,\n", " 'index': 431,\n", " 'word': 'life',\n", " 'start': 1966,\n", " 'end': 1970},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9992847,\n", " 'index': 432,\n", " 'word': ',',\n", " 'start': 1970,\n", " 'end': 1971},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.995812,\n", " 'index': 433,\n", " 'word': 'you',\n", " 'start': 1972,\n", " 'end': 1975},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99794585,\n", " 'index': 434,\n", " 'word': 'could',\n", " 'start': 1976,\n", " 'end': 1981},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9973961,\n", " 'index': 435,\n", " 'word': 'easily',\n", " 'start': 1982,\n", " 'end': 1988},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9948614,\n", " 'index': 436,\n", " 'word': 'see',\n", " 'start': 1989,\n", " 'end': 1992},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99854785,\n", " 'index': 437,\n", " 'word': 'what',\n", " 'start': 1993,\n", " 'end': 1997},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99702233,\n", " 'index': 438,\n", " 'word': 'they',\n", " 'start': 1998,\n", " 'end': 2002},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9966197,\n", " 'index': 439,\n", " 'word': 'were',\n", " 'start': 2003,\n", " 'end': 2007},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99884593,\n", " 'index': 440,\n", " 'word': '.',\n", " 'start': 2007,\n", " 'end': 2008},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9997309,\n", " 'index': 441,\n", " 'word': 'what',\n", " 'start': 2009,\n", " 'end': 2013},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9998198,\n", " 'index': 442,\n", " 'word': 'the',\n", " 'start': 2014,\n", " 'end': 2017},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99978286,\n", " 'index': 443,\n", " 'word': 'picture',\n", " 'start': 2018,\n", " 'end': 2025},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9994942,\n", " 'index': 444,\n", " 'word': 'showed',\n", " 'start': 2026,\n", " 'end': 2032},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9993857,\n", " 'index': 445,\n", " 'word': 'was',\n", " 'start': 2033,\n", " 'end': 2036},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.64063627,\n", " 'index': 446,\n", " 'word': 'the',\n", " 'start': 2037,\n", " 'end': 2040},\n", " {'entity': 'LABEL_10',\n", " 'score': 0.68473804,\n", " 'index': 447,\n", " 'word': 'butte',\n", " 'start': 2041,\n", " 'end': 2046},\n", " {'entity': 'LABEL_10',\n", " 'score': 0.7311275,\n", " 'index': 448,\n", " 'word': 'or',\n", " 'start': 2047,\n", " 'end': 2049},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.47081882,\n", " 'index': 449,\n", " 'word': 'mesa',\n", " 'start': 2050,\n", " 'end': 2054},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.9972504,\n", " 'index': 450,\n", " 'word': ',',\n", " 'start': 2054,\n", " 'end': 2055},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99924064,\n", " 'index': 451,\n", " 'word': 'which',\n", " 'start': 2056,\n", " 'end': 2061},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99846965,\n", " 'index': 452,\n", " 'word': 'are',\n", " 'start': 2062,\n", " 'end': 2065},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99877447,\n", " 'index': 453,\n", " 'word': 'land',\n", " 'start': 2066,\n", " 'end': 2070},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99923384,\n", " 'index': 454,\n", " 'word': '##forms',\n", " 'start': 2070,\n", " 'end': 2075},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99003917,\n", " 'index': 455,\n", " 'word': 'common',\n", " 'start': 2076,\n", " 'end': 2082},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.907271,\n", " 'index': 456,\n", " 'word': 'around',\n", " 'start': 2083,\n", " 'end': 2089},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.33334938,\n", " 'index': 457,\n", " 'word': 'the',\n", " 'start': 2090,\n", " 'end': 2093},\n", " {'entity': 'LABEL_10',\n", " 'score': 0.66397417,\n", " 'index': 458,\n", " 'word': 'american',\n", " 'start': 2094,\n", " 'end': 2102},\n", " {'entity': 'LABEL_11',\n", " 'score': 0.86412615,\n", " 'index': 459,\n", " 'word': 'west',\n", " 'start': 2103,\n", " 'end': 2107},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.99960643,\n", " 'index': 460,\n", " 'word': '.',\n", " 'start': 2107,\n", " 'end': 2108}]" ] }, "execution_count": 104, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"brettlin/distilbert-base-uncased-finetuned-ner\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"brettlin/distilbert-base-uncased-finetuned-ner\")\n", "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "\n", "ner_results = nlp(text)\n", "ner_results" ] }, { "cell_type": "code", "execution_count": 105, "id": "888f3316-8453-418b-be9a-266787210929", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "LABEL_0 423\n", "LABEL_10 4\n", "LABEL_11 1\n", "LABEL_14 9\n", "LABEL_20 3\n", "LABEL_24 7\n", "LABEL_5 1\n", "LABEL_7 2\n", "LABEL_73 2\n", "LABEL_74 4\n", "LABEL_75 4\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word \n", "LABEL_0 9\n", " ##ct 1\n", " ##ern 1\n", " ##form 2\n", " ##forms 1\n", " ##less 1\n", " ##n 1\n", " ##r 1\n", " ##rra 1\n", " , 33\n", " - 2\n", " . 12\n", " 25 1\n", " 3 1\n", " ; 1\n", " ? 1\n", " a 10\n", " able 1\n", " about 1\n", " absolute 1\n", " ago 1\n", " alien 2\n", " aliens 1\n", " all 3\n", " amazing 1\n", " an 2\n", " ancient 1\n", " and 8\n", " announced 1\n", " another 3\n", " any 1\n", " appeared 1\n", " are 1\n", " around 2\n", " at 2\n", " attention 1\n", " be 2\n", " became 1\n", " because 1\n", " bigger 1\n", " books 1\n", " but 4\n", " by 3\n", " camera 3\n", " can 1\n", " captured 1\n", " circling 1\n", " civilization 1\n", " clear 1\n", " cloud 1\n", " common 2\n", " correct 1\n", " could 2\n", " created 1\n", " day 1\n", " days 1\n", " decided 2\n", " defenders 1\n", " did 1\n", " digital 1\n", " disc 1\n", " easily 1\n", " engage 1\n", " evidence 2\n", " face 5\n", " few 1\n", " figured 1\n", " findings 1\n", " five 1\n", " for 2\n", " formation 1\n", " formed 1\n", " good 1\n", " had 1\n", " haze 1\n", " head 1\n", " hidden 1\n", " hide 1\n", " his 1\n", " huge 1\n", " human 2\n", " icon 1\n", " if 2\n", " image 2\n", " in 3\n", " is 2\n", " it 10\n", " just 3\n", " land 3\n", " later 1\n", " life 3\n", " like 1\n", " likeness 1\n", " look 1\n", " made 2\n", " magazines 1\n", " make 1\n", " making 1\n", " mali 1\n", " markings 1\n", " mars 6\n", " martian 1\n", " maximum 1\n", " me 1\n", " mean 1\n", " means 1\n", " meant 1\n", " mesa 2\n", " monument 1\n", " movies 1\n", " nasa 2\n", " natural 2\n", " no 3\n", " note 1\n", " obviously 1\n", " of 5\n", " on 7\n", " one 1\n", " only 2\n", " original 1\n", " our 1\n", " out 1\n", " people 1\n", " photo 1\n", " photos 2\n", " picture 4\n", " pixel 1\n", " planet 1\n", " pop 1\n", " prove 1\n", " public 1\n", " re 1\n", " really 1\n", " resembled 1\n", " revealed 1\n", " revealing 1\n", " revolution 1\n", " rock 1\n", " rumor 1\n", " s 3\n", " scientists 2\n", " see 2\n", " shadows 2\n", " shadowy 1\n", " sharpe 1\n", " shot 2\n", " should 1\n", " showed 1\n", " signs 1\n", " size 1\n", " snapping 1\n", " so 1\n", " some 1\n", " soon 1\n", " spacecraft 1\n", " spotted 1\n", " started 1\n", " story 1\n", " sure 3\n", " t 2\n", " take 2\n", " team 2\n", " tell 1\n", " ten 1\n", " than 2\n", " that 10\n", " the 17\n", " them 1\n", " there 3\n", " they 1\n", " things 1\n", " this 2\n", " thought 2\n", " times 2\n", " to 10\n", " took 1\n", " twenty 1\n", " us 2\n", " using 1\n", " very 2\n", " viking 2\n", " wanted 1\n", " was 10\n", " wasn 1\n", " way 1\n", " we 7\n", " well 1\n", " were 3\n", " weren 1\n", " what 2\n", " when 1\n", " which 5\n", " whole 1\n", " wish 1\n", " with 2\n", " would 1\n", " wrong 2\n", " years 2\n", " yes 1\n", " you 4\n", "LABEL_10 american 1\n", " butte 1\n", " or 1\n", " summer 1\n", "LABEL_11 west 1\n", "LABEL_14 ##out 1\n", " 1 1\n", " budget 1\n", " check 1\n", " grocery 1\n", " haunted 1\n", " mars 1\n", " nasa 1\n", " store 1\n", "LABEL_20 ##don 1\n", " ##ia 1\n", " cy 1\n", "LABEL_24 , 1\n", " 1998 1\n", " 2001 1\n", " 5 1\n", " 8 1\n", " april 2\n", "LABEL_5 michael 1\n", "LABEL_7 ##n 1\n", " mali 1\n", "LABEL_73 egypt 1\n", " radio 1\n", "LABEL_74 ##er 1\n", " ##ion 1\n", " orbit 1\n", " talk 1\n", "LABEL_75 lines 1\n", " pharaoh 1\n", " scientist 1\n", " shows 1\n", "dtype: int64" ] }, "execution_count": 105, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with open(\"48 brettlindistilbert-base-uncased-finetuned-ner.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()\n", "\n", "\n", "# ['O', \t0\n", "# 'B-O', \t1\n", "# 'I-O', \t2\n", "# 'L-O', \t3\n", "# 'U-O', \t4\n", "# 'B-PER', \t5\n", "# 'I-PER', \t6\n", "# 'L-PER', \t7\n", "# 'U-PER', \t8\n", "# 'B-LOC', \t9\n", "# 'I-LOC', \t10\n", "# 'L-LOC', \t11\n", "# 'U-LOC', \t12\n", "# 'B-ORG', \t13\n", "# 'I-ORG', \t14\n", "# 'L-ORG', \t15\n", "# 'U-ORG', \t16\n", "# 'B-NRP', \t17\n", "# 'I-NRP', \t18\n", "# 'L-NRP', \t19\n", "# 'U-NRP', \t20\n", "# 'B-DATE_TIME', \t21\n", "# 'I-DATE_TIME', \t22\n", "# 'L-DATE_TIME', \t23\n", "# 'U-DATE_TIME', \t24\n", "# 'B-CREDIT_CARD', \t25\n", "# 'I-CREDIT_CARD', \t26\n", "# 'L-CREDIT_CARD', \t27\n", "# 'U-CREDIT_CARD', \t28\n", "# 'B-URL', \t29\n", "# 'I-URL', \t30\n", "# 'L-URL', \t31\n", "# 'U-URL', \t32\n", "# 'B-IBAN_CODE', \t33\n", "# 'I-IBAN_CODE', \t34\n", "# 'L-IBAN_CODE', \t35\n", "# 'U-IBAN_CODE', \t36\n", "# 'B-US_BANK_NUMBER', \t37\n", "# 'I-US_BANK_NUMBER', \t38\n", "# 'L-US_BANK_NUMBER', \t39\n", "# 'U-US_BANK_NUMBER', \t40\n", "# 'B-PHONE_NUMBER', \t41\n", "# 'I-PHONE_NUMBER', \t42\n", "# 'L-PHONE_NUMBER', \t43\n", "# 'U-PHONE_NUMBER', \t44\n", "# 'B-US_SSN', \t45\n", "# 'I-US_SSN', \t46\n", "# 'L-US_SSN', \t47\n", "# 'U-US_SSN', \t48\n", "# 'B-US_PASSPORT', \t49\n", "# 'I-US_PASSPORT', \t50\n", "# 'L-US_PASSPORT', \t51\n", "# 'U-US_PASSPORT', \t52\n", "# 'B-US_DRIVER_LICENSE', \t53\n", "# 'I-US_DRIVER_LICENSE', \t54\n", "# 'L-US_DRIVER_LICENSE', \t55\n", "# 'U-US_DRIVER_LICENSE', \t56\n", "# 'B-US_LICENSE_PLATE', \t57\n", "# 'I-US_LICENSE_PLATE', \t58\n", "# 'L-US_LICENSE_PLATE', \t59\n", "# 'U-US_LICENSE_PLATE', \t60\n", "# 'B-IP_ADDRESS', \t61\n", "# 'I-IP_ADDRESS', \t62\n", "# 'L-IP_ADDRESS', \t63\n", "# 'U-IP_ADDRESS', \t64\n", "# 'B-US_ITIN', \t65\n", "# 'I-US_ITIN', \t66\n", "# 'L-US_ITIN', \t67\n", "# 'U-US_ITIN', \t68\n", "# 'B-EMAIL_ADDRESS', \t69\n", "# 'I-EMAIL_ADDRESS', \t70\n", "# 'L-EMAIL_ADDRESS', \t71\n", "# 'U-EMAIL_ADDRESS', \t72\n", "# 'B-TITLE', \t73\n", "# 'I-TITLE', \t74\n", "# 'L-TITLE', \t75\n", "# 'U-TITLE', \t76\n", "# 'B-COORDINATE', \t77\n", "# 'I-COORDINATE', \t78\n", "# 'L-COORDINATE', \t79\n", "# 'U-COORDINATE', \t80\n", "# 'B-IMEI', \t81\n", "# 'I-IMEI', \t82\n", "# 'L-IMEI', \t83\n", "# 'U-IMEI', \t84\n", "# 'B-PASSWORD', \t85\n", "# 'I-PASSWORD', \t86\n", "# 'L-PASSWORD', \t87\n", "# 'U-PASSWORD', \t88\n", "# 'B-LICENSE_PLATE', \t89\n", "# 'I-LICENSE_PLATE', \t90\n", "# 'L-LICENSE_PLATE', \t91\n", "# 'U-LICENSE_PLATE', \t92\n", "# 'B-CURRENCY', \t93\n", "# 'I-CURRENCY', \t94\n", "# 'L-CURRENCY', \t95\n", "# 'U-CURRENCY', \t96\n", "# 'B-FINANCIAL', \t97\n", "# 'I-FINANCIAL', \t98\n", "# 'L-FINANCIAL', \t99\n", "# 'U-FINANCIAL', \t100\n", "# 'B-ROUTING_NUMBER', \t101\n", "# 'I-ROUTING_NUMBER', \t102\n", "# 'L-ROUTING_NUMBER', \t103\n", "# 'U-ROUTING_NUMBER', \t104\n", "# 'B-SWIFT_CODE', \t105\n", "# 'I-SWIFT_CODE', \t106\n", "# 'L-SWIFT_CODE', \t107\n", "# 'U-SWIFT_CODE', \t108\n", "# 'B-MAC_ADDRESS', \t109\n", "# 'I-MAC_ADDRESS', \t110\n", "# 'L-MAC_ADDRESS', \t111\n", "# 'U-MAC_ADDRESS', \t112\n", "# 'B-AGE', \t113\n", "# 'I-AGE', \t114\n", "# 'L-AGE', \t115\n", "# 'U-AGE']\t116\n", "\n" ] }, { "cell_type": "markdown", "id": "665c8f4a-5e6d-4c7a-9b4e-263d5181d0eb", "metadata": {}, "source": [ "## 49 papluca/xlm-roberta-base-language-detection" ] }, { "cell_type": "code", "execution_count": 117, "id": "9e192dd7-a08d-4f3f-a4ea-eb22adb0c755", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of XLMRobertaForTokenClassification were not initialized from the model checkpoint at papluca/xlm-roberta-base-language-detection and are newly initialized: ['classifier.bias', 'classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "data": { "text/plain": [ "[{'entity': 'en',\n", " 'score': 0.11591221,\n", " 'index': 1,\n", " 'word': '▁So',\n", " 'start': 0,\n", " 'end': 2},\n", " {'entity': 'en',\n", " 'score': 0.124623634,\n", " 'index': 2,\n", " 'word': ',',\n", " 'start': 2,\n", " 'end': 3},\n", " {'entity': 'el',\n", " 'score': 0.12547147,\n", " 'index': 3,\n", " 'word': '▁if',\n", " 'start': 4,\n", " 'end': 6},\n", " {'entity': 'en',\n", " 'score': 0.13911082,\n", " 'index': 4,\n", " 'word': '▁you',\n", " 'start': 7,\n", " 'end': 10},\n", " {'entity': 'en',\n", " 'score': 0.17602253,\n", " 'index': 5,\n", " 'word': \"'\",\n", " 'start': 10,\n", " 'end': 11},\n", " {'entity': 'el',\n", " 'score': 0.11501573,\n", " 'index': 6,\n", " 'word': 're',\n", " 'start': 11,\n", " 'end': 13},\n", " {'entity': 'en',\n", " 'score': 0.14821951,\n", " 'index': 7,\n", " 'word': '▁a',\n", " 'start': 14,\n", " 'end': 15},\n", " {'entity': 'el',\n", " 'score': 0.10118,\n", " 'index': 8,\n", " 'word': '▁NASA',\n", " 'start': 16,\n", " 'end': 20},\n", " {'entity': 'el',\n", " 'score': 0.09721276,\n", " 'index': 9,\n", " 'word': '▁scientist',\n", " 'start': 21,\n", " 'end': 30},\n", " {'entity': 'el',\n", " 'score': 0.12020393,\n", " 'index': 10,\n", " 'word': ',',\n", " 'start': 30,\n", " 'end': 31},\n", " {'entity': 'el',\n", " 'score': 0.13406067,\n", " 'index': 11,\n", " 'word': '▁you',\n", " 'start': 32,\n", " 'end': 35},\n", " {'entity': 'el',\n", " 'score': 0.11300563,\n", " 'index': 12,\n", " 'word': '▁should',\n", " 'start': 36,\n", " 'end': 42},\n", " {'entity': 'en',\n", " 'score': 0.14275809,\n", " 'index': 13,\n", " 'word': '▁be',\n", " 'start': 43,\n", " 'end': 45},\n", " {'entity': 'el',\n", " 'score': 0.1305934,\n", " 'index': 14,\n", " 'word': '▁able',\n", " 'start': 46,\n", " 'end': 50},\n", " {'entity': 'en',\n", " 'score': 0.16247976,\n", " 'index': 15,\n", " 'word': '▁to',\n", " 'start': 51,\n", " 'end': 53},\n", " {'entity': 'el',\n", " 'score': 0.12143467,\n", " 'index': 16,\n", " 'word': '▁tell',\n", " 'start': 54,\n", " 'end': 58},\n", " {'entity': 'el',\n", " 'score': 0.109906994,\n", " 'index': 17,\n", " 'word': '▁me',\n", " 'start': 59,\n", " 'end': 61},\n", " {'entity': 'en',\n", " 'score': 0.1670305,\n", " 'index': 18,\n", " 'word': '▁the',\n", " 'start': 62,\n", " 'end': 65},\n", " {'entity': 'en',\n", " 'score': 0.13686182,\n", " 'index': 19,\n", " 'word': '▁whole',\n", " 'start': 66,\n", " 'end': 71},\n", " {'entity': 'el',\n", " 'score': 0.12123093,\n", " 'index': 20,\n", " 'word': '▁story',\n", " 'start': 72,\n", " 'end': 77},\n", " {'entity': 'en',\n", " 'score': 0.15466973,\n", " 'index': 21,\n", " 'word': '▁about',\n", " 'start': 78,\n", " 'end': 83},\n", " {'entity': 'en',\n", " 'score': 0.14879441,\n", " 'index': 22,\n", " 'word': '▁the',\n", " 'start': 84,\n", " 'end': 87},\n", " {'entity': 'en',\n", " 'score': 0.106891826,\n", " 'index': 23,\n", " 'word': '▁Face',\n", " 'start': 88,\n", " 'end': 92},\n", " {'entity': 'el',\n", " 'score': 0.11462047,\n", " 'index': 24,\n", " 'word': '▁On',\n", " 'start': 93,\n", " 'end': 95},\n", " {'entity': 'en',\n", " 'score': 0.09874061,\n", " 'index': 25,\n", " 'word': '▁Mars',\n", " 'start': 96,\n", " 'end': 100},\n", " {'entity': 'en',\n", " 'score': 0.14187884,\n", " 'index': 26,\n", " 'word': ',',\n", " 'start': 100,\n", " 'end': 101},\n", " {'entity': 'en',\n", " 'score': 0.1268902,\n", " 'index': 27,\n", " 'word': '▁which',\n", " 'start': 102,\n", " 'end': 107},\n", " {'entity': 'el',\n", " 'score': 0.09524864,\n", " 'index': 28,\n", " 'word': '▁obviously',\n", " 'start': 108,\n", " 'end': 117},\n", " {'entity': 'el',\n", " 'score': 0.12556419,\n", " 'index': 29,\n", " 'word': '▁is',\n", " 'start': 118,\n", " 'end': 120},\n", " {'entity': 'el',\n", " 'score': 0.13329445,\n", " 'index': 30,\n", " 'word': '▁evidence',\n", " 'start': 121,\n", " 'end': 129},\n", " {'entity': 'en',\n", " 'score': 0.15930562,\n", " 'index': 31,\n", " 'word': '▁that',\n", " 'start': 130,\n", " 'end': 134},\n", " {'entity': 'el',\n", " 'score': 0.13351129,\n", " 'index': 32,\n", " 'word': '▁there',\n", " 'start': 135,\n", " 'end': 140},\n", " {'entity': 'en',\n", " 'score': 0.12728569,\n", " 'index': 33,\n", " 'word': '▁is',\n", " 'start': 141,\n", " 'end': 143},\n", " {'entity': 'el',\n", " 'score': 0.11237473,\n", " 'index': 34,\n", " 'word': '▁life',\n", " 'start': 144,\n", " 'end': 148},\n", " {'entity': 'el',\n", " 'score': 0.14296363,\n", " 'index': 35,\n", " 'word': '▁on',\n", " 'start': 149,\n", " 'end': 151},\n", " {'entity': 'en',\n", " 'score': 0.09912568,\n", " 'index': 36,\n", " 'word': '▁Mars',\n", " 'start': 152,\n", " 'end': 156},\n", " {'entity': 'en',\n", " 'score': 0.14278257,\n", " 'index': 37,\n", " 'word': ',',\n", " 'start': 156,\n", " 'end': 157},\n", " {'entity': 'en',\n", " 'score': 0.1724549,\n", " 'index': 38,\n", " 'word': '▁and',\n", " 'start': 158,\n", " 'end': 161},\n", " {'entity': 'en',\n", " 'score': 0.14657633,\n", " 'index': 39,\n", " 'word': '▁that',\n", " 'start': 162,\n", " 'end': 166},\n", " {'entity': 'en',\n", " 'score': 0.17009127,\n", " 'index': 40,\n", " 'word': '▁the',\n", " 'start': 167,\n", " 'end': 170},\n", " {'entity': 'en',\n", " 'score': 0.12712897,\n", " 'index': 41,\n", " 'word': '▁face',\n", " 'start': 171,\n", " 'end': 175},\n", " {'entity': 'en',\n", " 'score': 0.14602062,\n", " 'index': 42,\n", " 'word': '▁was',\n", " 'start': 176,\n", " 'end': 179},\n", " {'entity': 'el',\n", " 'score': 0.12748437,\n", " 'index': 43,\n", " 'word': '▁created',\n", " 'start': 180,\n", " 'end': 187},\n", " {'entity': 'en',\n", " 'score': 0.14597844,\n", " 'index': 44,\n", " 'word': '▁by',\n", " 'start': 188,\n", " 'end': 190},\n", " {'entity': 'el',\n", " 'score': 0.094343364,\n", " 'index': 45,\n", " 'word': '▁alien',\n", " 'start': 191,\n", " 'end': 196},\n", " {'entity': 'en',\n", " 'score': 0.137794,\n", " 'index': 46,\n", " 'word': 's',\n", " 'start': 196,\n", " 'end': 197},\n", " {'entity': 'el',\n", " 'score': 0.119442455,\n", " 'index': 47,\n", " 'word': ',',\n", " 'start': 197,\n", " 'end': 198},\n", " {'entity': 'el',\n", " 'score': 0.08792857,\n", " 'index': 48,\n", " 'word': '▁correct',\n", " 'start': 199,\n", " 'end': 206},\n", " {'entity': 'el',\n", " 'score': 0.08977283,\n", " 'index': 49,\n", " 'word': '?\"',\n", " 'start': 206,\n", " 'end': 208},\n", " {'entity': 'el',\n", " 'score': 0.08942173,\n", " 'index': 50,\n", " 'word': '▁No',\n", " 'start': 209,\n", " 'end': 211},\n", " {'entity': 'el',\n", " 'score': 0.11885595,\n", " 'index': 51,\n", " 'word': ',',\n", " 'start': 211,\n", " 'end': 212},\n", " {'entity': 'en',\n", " 'score': 0.128854,\n", " 'index': 52,\n", " 'word': '▁twenty',\n", " 'start': 213,\n", " 'end': 219},\n", " {'entity': 'en',\n", " 'score': 0.14528814,\n", " 'index': 53,\n", " 'word': '▁five',\n", " 'start': 220,\n", " 'end': 224},\n", " {'entity': 'en',\n", " 'score': 0.14489171,\n", " 'index': 54,\n", " 'word': '▁years',\n", " 'start': 225,\n", " 'end': 230},\n", " {'entity': 'en',\n", " 'score': 0.1306482,\n", " 'index': 55,\n", " 'word': '▁ago',\n", " 'start': 231,\n", " 'end': 234},\n", " {'entity': 'en',\n", " 'score': 0.13290027,\n", " 'index': 56,\n", " 'word': ',',\n", " 'start': 234,\n", " 'end': 235},\n", " {'entity': 'el',\n", " 'score': 0.10796524,\n", " 'index': 57,\n", " 'word': '▁our',\n", " 'start': 236,\n", " 'end': 239},\n", " {'entity': 'el',\n", " 'score': 0.08923595,\n", " 'index': 58,\n", " 'word': '▁Viking',\n", " 'start': 240,\n", " 'end': 246},\n", " {'entity': 'el',\n", " 'score': 0.08884452,\n", " 'index': 59,\n", " 'word': '▁1',\n", " 'start': 247,\n", " 'end': 248},\n", " {'entity': 'el',\n", " 'score': 0.11797657,\n", " 'index': 60,\n", " 'word': '▁space',\n", " 'start': 249,\n", " 'end': 254},\n", " {'entity': 'el',\n", " 'score': 0.10235624,\n", " 'index': 61,\n", " 'word': 'craft',\n", " 'start': 254,\n", " 'end': 259},\n", " {'entity': 'el',\n", " 'score': 0.12116124,\n", " 'index': 62,\n", " 'word': '▁was',\n", " 'start': 260,\n", " 'end': 263},\n", " {'entity': 'en',\n", " 'score': 0.13153702,\n", " 'index': 63,\n", " 'word': '▁circ',\n", " 'start': 264,\n", " 'end': 268},\n", " {'entity': 'en',\n", " 'score': 0.15348472,\n", " 'index': 64,\n", " 'word': 'ling',\n", " 'start': 268,\n", " 'end': 272},\n", " {'entity': 'en',\n", " 'score': 0.16437925,\n", " 'index': 65,\n", " 'word': '▁the',\n", " 'start': 273,\n", " 'end': 276},\n", " {'entity': 'el',\n", " 'score': 0.09529811,\n", " 'index': 66,\n", " 'word': '▁planet',\n", " 'start': 277,\n", " 'end': 283},\n", " {'entity': 'el',\n", " 'score': 0.11570662,\n", " 'index': 67,\n", " 'word': ',',\n", " 'start': 283,\n", " 'end': 284},\n", " {'entity': 'en',\n", " 'score': 0.10650027,\n", " 'index': 68,\n", " 'word': '▁sna',\n", " 'start': 285,\n", " 'end': 288},\n", " {'entity': 'en',\n", " 'score': 0.16732152,\n", " 'index': 69,\n", " 'word': 'pping',\n", " 'start': 288,\n", " 'end': 293},\n", " {'entity': 'el',\n", " 'score': 0.10069636,\n", " 'index': 70,\n", " 'word': '▁photos',\n", " 'start': 294,\n", " 'end': 300},\n", " {'entity': 'en',\n", " 'score': 0.12804745,\n", " 'index': 71,\n", " 'word': ',',\n", " 'start': 300,\n", " 'end': 301},\n", " {'entity': 'el',\n", " 'score': 0.12151808,\n", " 'index': 72,\n", " 'word': '▁when',\n", " 'start': 302,\n", " 'end': 306},\n", " {'entity': 'en',\n", " 'score': 0.14805369,\n", " 'index': 73,\n", " 'word': '▁it',\n", " 'start': 307,\n", " 'end': 309},\n", " {'entity': 'en',\n", " 'score': 0.13002725,\n", " 'index': 74,\n", " 'word': '▁spot',\n", " 'start': 310,\n", " 'end': 314},\n", " {'entity': 'en',\n", " 'score': 0.14732121,\n", " 'index': 75,\n", " 'word': 'ted',\n", " 'start': 314,\n", " 'end': 317},\n", " {'entity': 'en',\n", " 'score': 0.16823284,\n", " 'index': 76,\n", " 'word': '▁the',\n", " 'start': 318,\n", " 'end': 321},\n", " {'entity': 'el',\n", " 'score': 0.10869224,\n", " 'index': 77,\n", " 'word': '▁shadow',\n", " 'start': 322,\n", " 'end': 328},\n", " {'entity': 'en',\n", " 'score': 0.1593602,\n", " 'index': 78,\n", " 'word': 'y',\n", " 'start': 328,\n", " 'end': 329},\n", " {'entity': 'el',\n", " 'score': 0.1302847,\n", " 'index': 79,\n", " 'word': '▁like',\n", " 'start': 330,\n", " 'end': 334},\n", " {'entity': 'en',\n", " 'score': 0.13379778,\n", " 'index': 80,\n", " 'word': 'ness',\n", " 'start': 334,\n", " 'end': 338},\n", " {'entity': 'en',\n", " 'score': 0.15503198,\n", " 'index': 81,\n", " 'word': '▁of',\n", " 'start': 339,\n", " 'end': 341},\n", " {'entity': 'en',\n", " 'score': 0.15058768,\n", " 'index': 82,\n", " 'word': '▁a',\n", " 'start': 342,\n", " 'end': 343},\n", " {'entity': 'el',\n", " 'score': 0.110621594,\n", " 'index': 83,\n", " 'word': '▁human',\n", " 'start': 344,\n", " 'end': 349},\n", " {'entity': 'en',\n", " 'score': 0.13605565,\n", " 'index': 84,\n", " 'word': '▁face',\n", " 'start': 350,\n", " 'end': 354},\n", " {'entity': 'en',\n", " 'score': 0.15529045,\n", " 'index': 85,\n", " 'word': '.',\n", " 'start': 354,\n", " 'end': 355},\n", " {'entity': 'el',\n", " 'score': 0.100054726,\n", " 'index': 86,\n", " 'word': '▁Us',\n", " 'start': 356,\n", " 'end': 358},\n", " {'entity': 'el',\n", " 'score': 0.1203518,\n", " 'index': 87,\n", " 'word': '▁scientist',\n", " 'start': 359,\n", " 'end': 368},\n", " {'entity': 'en',\n", " 'score': 0.15678018,\n", " 'index': 88,\n", " 'word': 's',\n", " 'start': 368,\n", " 'end': 369},\n", " {'entity': 'el',\n", " 'score': 0.14597937,\n", " 'index': 89,\n", " 'word': '▁figure',\n", " 'start': 370,\n", " 'end': 376},\n", " {'entity': 'en',\n", " 'score': 0.14744708,\n", " 'index': 90,\n", " 'word': 'd',\n", " 'start': 376,\n", " 'end': 377},\n", " {'entity': 'el',\n", " 'score': 0.13703896,\n", " 'index': 91,\n", " 'word': '▁out',\n", " 'start': 378,\n", " 'end': 381},\n", " {'entity': 'el',\n", " 'score': 0.13980067,\n", " 'index': 92,\n", " 'word': '▁that',\n", " 'start': 382,\n", " 'end': 386},\n", " {'entity': 'en',\n", " 'score': 0.14452939,\n", " 'index': 93,\n", " 'word': '▁it',\n", " 'start': 387,\n", " 'end': 389},\n", " {'entity': 'el',\n", " 'score': 0.13332519,\n", " 'index': 94,\n", " 'word': '▁was',\n", " 'start': 390,\n", " 'end': 393},\n", " {'entity': 'el',\n", " 'score': 0.11484533,\n", " 'index': 95,\n", " 'word': '▁just',\n", " 'start': 394,\n", " 'end': 398},\n", " {'entity': 'ar',\n", " 'score': 0.08993049,\n", " 'index': 96,\n", " 'word': '▁another',\n", " 'start': 399,\n", " 'end': 406},\n", " {'entity': 'ar',\n", " 'score': 0.08672481,\n", " 'index': 97,\n", " 'word': '▁Marti',\n", " 'start': 407,\n", " 'end': 412},\n", " {'entity': 'el',\n", " 'score': 0.122489624,\n", " 'index': 98,\n", " 'word': 'an',\n", " 'start': 412,\n", " 'end': 414},\n", " {'entity': 'el',\n", " 'score': 0.08492366,\n", " 'index': 99,\n", " 'word': '▁mesa',\n", " 'start': 415,\n", " 'end': 419},\n", " {'entity': 'el',\n", " 'score': 0.11751986,\n", " 'index': 100,\n", " 'word': ',',\n", " 'start': 419,\n", " 'end': 420},\n", " {'entity': 'el',\n", " 'score': 0.097591884,\n", " 'index': 101,\n", " 'word': '▁common',\n", " 'start': 421,\n", " 'end': 427},\n", " {'entity': 'el',\n", " 'score': 0.106916085,\n", " 'index': 102,\n", " 'word': '▁around',\n", " 'start': 428,\n", " 'end': 434},\n", " {'entity': 'en',\n", " 'score': 0.120345294,\n", " 'index': 103,\n", " 'word': '▁Cy',\n", " 'start': 435,\n", " 'end': 437},\n", " {'entity': 'en',\n", " 'score': 0.12486706,\n", " 'index': 104,\n", " 'word': 'do',\n", " 'start': 437,\n", " 'end': 439},\n", " {'entity': 'el',\n", " 'score': 0.11398356,\n", " 'index': 105,\n", " 'word': 'nia',\n", " 'start': 439,\n", " 'end': 442},\n", " {'entity': 'en',\n", " 'score': 0.12314375,\n", " 'index': 106,\n", " 'word': ',',\n", " 'start': 442,\n", " 'end': 443},\n", " {'entity': 'el',\n", " 'score': 0.09227569,\n", " 'index': 107,\n", " 'word': '▁only',\n", " 'start': 444,\n", " 'end': 448},\n", " {'entity': 'el',\n", " 'score': 0.11441822,\n", " 'index': 108,\n", " 'word': '▁this',\n", " 'start': 449,\n", " 'end': 453},\n", " {'entity': 'el',\n", " 'score': 0.13375378,\n", " 'index': 109,\n", " 'word': '▁one',\n", " 'start': 454,\n", " 'end': 457},\n", " {'entity': 'el',\n", " 'score': 0.1260068,\n", " 'index': 110,\n", " 'word': '▁had',\n", " 'start': 458,\n", " 'end': 461},\n", " {'entity': 'el',\n", " 'score': 0.0983968,\n", " 'index': 111,\n", " 'word': '▁shadow',\n", " 'start': 462,\n", " 'end': 468},\n", " {'entity': 'en',\n", " 'score': 0.14185685,\n", " 'index': 112,\n", " 'word': 's',\n", " 'start': 468,\n", " 'end': 469},\n", " {'entity': 'en',\n", " 'score': 0.14497927,\n", " 'index': 113,\n", " 'word': '▁that',\n", " 'start': 470,\n", " 'end': 474},\n", " {'entity': 'el',\n", " 'score': 0.13247032,\n", " 'index': 114,\n", " 'word': '▁made',\n", " 'start': 475,\n", " 'end': 479},\n", " {'entity': 'el',\n", " 'score': 0.13548398,\n", " 'index': 115,\n", " 'word': '▁it',\n", " 'start': 480,\n", " 'end': 482},\n", " {'entity': 'el',\n", " 'score': 0.13423643,\n", " 'index': 116,\n", " 'word': '▁look',\n", " 'start': 483,\n", " 'end': 487},\n", " {'entity': 'el',\n", " 'score': 0.1423772,\n", " 'index': 117,\n", " 'word': '▁like',\n", " 'start': 488,\n", " 'end': 492},\n", " {'entity': 'el',\n", " 'score': 0.14577042,\n", " 'index': 118,\n", " 'word': '▁an',\n", " 'start': 493,\n", " 'end': 495},\n", " {'entity': 'ar',\n", " 'score': 0.081988394,\n", " 'index': 119,\n", " 'word': '▁Egypt',\n", " 'start': 496,\n", " 'end': 501},\n", " {'entity': 'el',\n", " 'score': 0.080225624,\n", " 'index': 120,\n", " 'word': 'ion',\n", " 'start': 501,\n", " 'end': 504},\n", " {'entity': 'el',\n", " 'score': 0.09277659,\n", " 'index': 121,\n", " 'word': '▁Phar',\n", " 'start': 505,\n", " 'end': 509},\n", " {'entity': 'en',\n", " 'score': 0.17454442,\n", " 'index': 122,\n", " 'word': 'a',\n", " 'start': 509,\n", " 'end': 510},\n", " {'entity': 'en',\n", " 'score': 0.10499313,\n", " 'index': 123,\n", " 'word': 'oh',\n", " 'start': 510,\n", " 'end': 512},\n", " {'entity': 'en',\n", " 'score': 0.1622584,\n", " 'index': 124,\n", " 'word': '.',\n", " 'start': 512,\n", " 'end': 513},\n", " {'entity': 'en',\n", " 'score': 0.14269882,\n", " 'index': 125,\n", " 'word': '▁Very',\n", " 'start': 514,\n", " 'end': 518},\n", " {'entity': 'el',\n", " 'score': 0.1382166,\n", " 'index': 126,\n", " 'word': '▁few',\n", " 'start': 519,\n", " 'end': 522},\n", " {'entity': 'el',\n", " 'score': 0.13428125,\n", " 'index': 127,\n", " 'word': '▁days',\n", " 'start': 523,\n", " 'end': 527},\n", " {'entity': 'el',\n", " 'score': 0.13032675,\n", " 'index': 128,\n", " 'word': '▁later',\n", " 'start': 528,\n", " 'end': 533},\n", " {'entity': 'el',\n", " 'score': 0.12615447,\n", " 'index': 129,\n", " 'word': ',',\n", " 'start': 533,\n", " 'end': 534},\n", " {'entity': 'el',\n", " 'score': 0.13707836,\n", " 'index': 130,\n", " 'word': '▁we',\n", " 'start': 535,\n", " 'end': 537},\n", " {'entity': 'el',\n", " 'score': 0.11976686,\n", " 'index': 131,\n", " 'word': '▁reveal',\n", " 'start': 538,\n", " 'end': 544},\n", " {'entity': 'en',\n", " 'score': 0.1461132,\n", " 'index': 132,\n", " 'word': 'ed',\n", " 'start': 544,\n", " 'end': 546},\n", " {'entity': 'en',\n", " 'score': 0.15168166,\n", " 'index': 133,\n", " 'word': '▁the',\n", " 'start': 547,\n", " 'end': 550},\n", " {'entity': 'el',\n", " 'score': 0.10999579,\n", " 'index': 134,\n", " 'word': '▁image',\n", " 'start': 551,\n", " 'end': 556},\n", " {'entity': 'el',\n", " 'score': 0.13296223,\n", " 'index': 135,\n", " 'word': '▁for',\n", " 'start': 557,\n", " 'end': 560},\n", " {'entity': 'el',\n", " 'score': 0.12075652,\n", " 'index': 136,\n", " 'word': '▁all',\n", " 'start': 561,\n", " 'end': 564},\n", " {'entity': 'en',\n", " 'score': 0.14416823,\n", " 'index': 137,\n", " 'word': '▁to',\n", " 'start': 565,\n", " 'end': 567},\n", " {'entity': 'el',\n", " 'score': 0.12048786,\n", " 'index': 138,\n", " 'word': '▁see',\n", " 'start': 568,\n", " 'end': 571},\n", " {'entity': 'en',\n", " 'score': 0.14799197,\n", " 'index': 139,\n", " 'word': ',',\n", " 'start': 571,\n", " 'end': 572},\n", " {'entity': 'en',\n", " 'score': 0.1485873,\n", " 'index': 140,\n", " 'word': '▁and',\n", " 'start': 573,\n", " 'end': 576},\n", " {'entity': 'el',\n", " 'score': 0.14215225,\n", " 'index': 141,\n", " 'word': '▁we',\n", " 'start': 577,\n", " 'end': 579},\n", " {'entity': 'el',\n", " 'score': 0.13101134,\n", " 'index': 142,\n", " 'word': '▁made',\n", " 'start': 580,\n", " 'end': 584},\n", " {'entity': 'el',\n", " 'score': 0.14018072,\n", " 'index': 143,\n", " 'word': '▁sure',\n", " 'start': 585,\n", " 'end': 589},\n", " {'entity': 'en',\n", " 'score': 0.15333681,\n", " 'index': 144,\n", " 'word': '▁to',\n", " 'start': 590,\n", " 'end': 592},\n", " {'entity': 'el',\n", " 'score': 0.11945983,\n", " 'index': 145,\n", " 'word': '▁note',\n", " 'start': 593,\n", " 'end': 597},\n", " {'entity': 'el',\n", " 'score': 0.13811947,\n", " 'index': 146,\n", " 'word': '▁that',\n", " 'start': 598,\n", " 'end': 602},\n", " {'entity': 'en',\n", " 'score': 0.14908531,\n", " 'index': 147,\n", " 'word': '▁it',\n", " 'start': 603,\n", " 'end': 605},\n", " {'entity': 'el',\n", " 'score': 0.13367933,\n", " 'index': 148,\n", " 'word': '▁was',\n", " 'start': 606,\n", " 'end': 609},\n", " {'entity': 'en',\n", " 'score': 0.1512028,\n", " 'index': 149,\n", " 'word': '▁a',\n", " 'start': 610,\n", " 'end': 611},\n", " {'entity': 'el',\n", " 'score': 0.12587915,\n", " 'index': 150,\n", " 'word': '▁huge',\n", " 'start': 612,\n", " 'end': 616},\n", " {'entity': 'el',\n", " 'score': 0.0964705,\n", " 'index': 151,\n", " 'word': '▁rock',\n", " 'start': 617,\n", " 'end': 621},\n", " {'entity': 'el',\n", " 'score': 0.11105906,\n", " 'index': 152,\n", " 'word': '▁formation',\n", " 'start': 622,\n", " 'end': 631},\n", " {'entity': 'en',\n", " 'score': 0.14971292,\n", " 'index': 153,\n", " 'word': '▁that',\n", " 'start': 632,\n", " 'end': 636},\n", " {'entity': 'el',\n", " 'score': 0.14679578,\n", " 'index': 154,\n", " 'word': '▁just',\n", " 'start': 637,\n", " 'end': 641},\n", " {'entity': 'en',\n", " 'score': 0.15699105,\n", " 'index': 155,\n", " 'word': '▁rese',\n", " 'start': 642,\n", " 'end': 646},\n", " {'entity': 'en',\n", " 'score': 0.15588175,\n", " 'index': 156,\n", " 'word': 'mble',\n", " 'start': 646,\n", " 'end': 650},\n", " {'entity': 'en',\n", " 'score': 0.15559243,\n", " 'index': 157,\n", " 'word': 'd',\n", " 'start': 650,\n", " 'end': 651},\n", " {'entity': 'el',\n", " 'score': 0.14160416,\n", " 'index': 158,\n", " 'word': '▁a',\n", " 'start': 652,\n", " 'end': 653},\n", " {'entity': 'el',\n", " 'score': 0.11545096,\n", " 'index': 159,\n", " 'word': '▁human',\n", " 'start': 654,\n", " 'end': 659},\n", " {'entity': 'el',\n", " 'score': 0.09418173,\n", " 'index': 160,\n", " 'word': '▁head',\n", " 'start': 660,\n", " 'end': 664},\n", " {'entity': 'en',\n", " 'score': 0.15333837,\n", " 'index': 161,\n", " 'word': '▁and',\n", " 'start': 665,\n", " 'end': 668},\n", " {'entity': 'en',\n", " 'score': 0.1189025,\n", " 'index': 162,\n", " 'word': '▁face',\n", " 'start': 669,\n", " 'end': 673},\n", " {'entity': 'en',\n", " 'score': 0.15204029,\n", " 'index': 163,\n", " 'word': ',',\n", " 'start': 673,\n", " 'end': 674},\n", " {'entity': 'el',\n", " 'score': 0.1288495,\n", " 'index': 164,\n", " 'word': '▁but',\n", " 'start': 675,\n", " 'end': 678},\n", " {'entity': 'el',\n", " 'score': 0.12995465,\n", " 'index': 165,\n", " 'word': '▁all',\n", " 'start': 679,\n", " 'end': 682},\n", " {'entity': 'en',\n", " 'score': 0.15353577,\n", " 'index': 166,\n", " 'word': '▁of',\n", " 'start': 683,\n", " 'end': 685},\n", " {'entity': 'en',\n", " 'score': 0.152363,\n", " 'index': 167,\n", " 'word': '▁it',\n", " 'start': 686,\n", " 'end': 688},\n", " {'entity': 'en',\n", " 'score': 0.1399179,\n", " 'index': 168,\n", " 'word': '▁was',\n", " 'start': 689,\n", " 'end': 692},\n", " {'entity': 'en',\n", " 'score': 0.1595559,\n", " 'index': 169,\n", " 'word': '▁for',\n", " 'start': 693,\n", " 'end': 696},\n", " {'entity': 'en',\n", " 'score': 0.15899158,\n", " 'index': 170,\n", " 'word': 'med',\n", " 'start': 696,\n", " 'end': 699},\n", " {'entity': 'el',\n", " 'score': 0.121669054,\n", " 'index': 171,\n", " 'word': '▁by',\n", " 'start': 700,\n", " 'end': 702},\n", " {'entity': 'en',\n", " 'score': 0.106091,\n", " 'index': 172,\n", " 'word': '▁shadow',\n", " 'start': 703,\n", " 'end': 709},\n", " {'entity': 'en',\n", " 'score': 0.14808753,\n", " 'index': 173,\n", " 'word': 's',\n", " 'start': 709,\n", " 'end': 710},\n", " {'entity': 'en',\n", " 'score': 0.16585921,\n", " 'index': 174,\n", " 'word': '.',\n", " 'start': 710,\n", " 'end': 711},\n", " {'entity': 'en',\n", " 'score': 0.1352026,\n", " 'index': 175,\n", " 'word': '▁We',\n", " 'start': 712,\n", " 'end': 714},\n", " {'entity': 'el',\n", " 'score': 0.11284589,\n", " 'index': 176,\n", " 'word': '▁only',\n", " 'start': 715,\n", " 'end': 719},\n", " {'entity': 'en',\n", " 'score': 0.09965902,\n", " 'index': 177,\n", " 'word': '▁announced',\n", " 'start': 720,\n", " 'end': 729},\n", " {'entity': 'en',\n", " 'score': 0.15637,\n", " 'index': 178,\n", " 'word': '▁it',\n", " 'start': 730,\n", " 'end': 732},\n", " {'entity': 'el',\n", " 'score': 0.12986624,\n", " 'index': 179,\n", " 'word': '▁because',\n", " 'start': 733,\n", " 'end': 740},\n", " {'entity': 'el',\n", " 'score': 0.14423794,\n", " 'index': 180,\n", " 'word': '▁we',\n", " 'start': 741,\n", " 'end': 743},\n", " {'entity': 'el',\n", " 'score': 0.1403598,\n", " 'index': 181,\n", " 'word': '▁thought',\n", " 'start': 744,\n", " 'end': 751},\n", " {'entity': 'en',\n", " 'score': 0.1668672,\n", " 'index': 182,\n", " 'word': '▁it',\n", " 'start': 752,\n", " 'end': 754},\n", " {'entity': 'el',\n", " 'score': 0.13016574,\n", " 'index': 183,\n", " 'word': '▁would',\n", " 'start': 755,\n", " 'end': 760},\n", " {'entity': 'el',\n", " 'score': 0.14227276,\n", " 'index': 184,\n", " 'word': '▁be',\n", " 'start': 761,\n", " 'end': 763},\n", " {'entity': 'el',\n", " 'score': 0.14242609,\n", " 'index': 185,\n", " 'word': '▁a',\n", " 'start': 764,\n", " 'end': 765},\n", " {'entity': 'el',\n", " 'score': 0.14193374,\n", " 'index': 186,\n", " 'word': '▁good',\n", " 'start': 766,\n", " 'end': 770},\n", " {'entity': 'el',\n", " 'score': 0.13710198,\n", " 'index': 187,\n", " 'word': '▁way',\n", " 'start': 771,\n", " 'end': 774},\n", " {'entity': 'el',\n", " 'score': 0.14386745,\n", " 'index': 188,\n", " 'word': '▁to',\n", " 'start': 775,\n", " 'end': 777},\n", " {'entity': 'el',\n", " 'score': 0.11916898,\n", " 'index': 189,\n", " 'word': '▁engage',\n", " 'start': 778,\n", " 'end': 784},\n", " {'entity': 'en',\n", " 'score': 0.1367849,\n", " 'index': 190,\n", " 'word': '▁the',\n", " 'start': 785,\n", " 'end': 788},\n", " {'entity': 'el',\n", " 'score': 0.12032364,\n", " 'index': 191,\n", " 'word': '▁public',\n", " 'start': 789,\n", " 'end': 795},\n", " {'entity': 'el',\n", " 'score': 0.11247768,\n", " 'index': 192,\n", " 'word': '▁with',\n", " 'start': 796,\n", " 'end': 800},\n", " {'entity': 'el',\n", " 'score': 0.10595911,\n", " 'index': 193,\n", " 'word': '▁NASA',\n", " 'start': 801,\n", " 'end': 805},\n", " {'entity': 'en',\n", " 'score': 0.15294623,\n", " 'index': 194,\n", " 'word': \"'\",\n", " 'start': 805,\n", " 'end': 806},\n", " {'entity': 'en',\n", " 'score': 0.14347178,\n", " 'index': 195,\n", " 'word': 's',\n", " 'start': 806,\n", " 'end': 807},\n", " {'entity': 'el',\n", " 'score': 0.111513235,\n", " 'index': 196,\n", " 'word': '▁finding',\n", " 'start': 808,\n", " 'end': 815},\n", " {'entity': 'en',\n", " 'score': 0.13011272,\n", " 'index': 197,\n", " 'word': 's',\n", " 'start': 815,\n", " 'end': 816},\n", " {'entity': 'en',\n", " 'score': 0.1234625,\n", " 'index': 198,\n", " 'word': ',',\n", " 'start': 816,\n", " 'end': 817},\n", " {'entity': 'en',\n", " 'score': 0.13722092,\n", " 'index': 199,\n", " 'word': '▁and',\n", " 'start': 818,\n", " 'end': 821},\n", " {'entity': 'en',\n", " 'score': 0.15018424,\n", " 'index': 200,\n", " 'word': '▁at',\n", " 'start': 822,\n", " 'end': 824},\n", " {'entity': 'en',\n", " 'score': 0.12469069,\n", " 'index': 201,\n", " 'word': 'rra',\n", " 'start': 824,\n", " 'end': 827},\n", " {'entity': 'en',\n", " 'score': 0.13173658,\n", " 'index': 202,\n", " 'word': 'ct',\n", " 'start': 827,\n", " 'end': 829},\n", " {'entity': 'el',\n", " 'score': 0.089053564,\n", " 'index': 203,\n", " 'word': '▁attention',\n", " 'start': 830,\n", " 'end': 839},\n", " {'entity': 'el',\n", " 'score': 0.1162944,\n", " 'index': 204,\n", " 'word': '▁to',\n", " 'start': 840,\n", " 'end': 842},\n", " {'entity': 'el',\n", " 'score': 0.08081766,\n", " 'index': 205,\n", " 'word': '▁Mars',\n", " 'start': 843,\n", " 'end': 847},\n", " {'entity': 'en',\n", " 'score': 0.16521907,\n", " 'index': 206,\n", " 'word': '-',\n", " 'start': 847,\n", " 'end': 848},\n", " {'entity': 'en',\n", " 'score': 0.13379842,\n", " 'index': 207,\n", " 'word': '-',\n", " 'start': 848,\n", " 'end': 849},\n", " {'entity': 'el',\n", " 'score': 0.12595442,\n", " 'index': 208,\n", " 'word': '▁and',\n", " 'start': 850,\n", " 'end': 853},\n", " {'entity': 'en',\n", " 'score': 0.14046155,\n", " 'index': 209,\n", " 'word': '▁it',\n", " 'start': 854,\n", " 'end': 856},\n", " {'entity': 'el',\n", " 'score': 0.13404858,\n", " 'index': 210,\n", " 'word': '▁did',\n", " 'start': 857,\n", " 'end': 860},\n", " {'entity': 'en',\n", " 'score': 0.16926748,\n", " 'index': 211,\n", " 'word': '.',\n", " 'start': 860,\n", " 'end': 861},\n", " {'entity': 'en',\n", " 'score': 0.16880788,\n", " 'index': 212,\n", " 'word': '▁The',\n", " 'start': 863,\n", " 'end': 866},\n", " {'entity': 'en',\n", " 'score': 0.13957304,\n", " 'index': 213,\n", " 'word': '▁face',\n", " 'start': 867,\n", " 'end': 871},\n", " {'entity': 'el',\n", " 'score': 0.13834707,\n", " 'index': 214,\n", " 'word': '▁on',\n", " 'start': 872,\n", " 'end': 874},\n", " {'entity': 'en',\n", " 'score': 0.09909718,\n", " 'index': 215,\n", " 'word': '▁Mars',\n", " 'start': 875,\n", " 'end': 879},\n", " {'entity': 'el',\n", " 'score': 0.12846456,\n", " 'index': 216,\n", " 'word': '▁soon',\n", " 'start': 880,\n", " 'end': 884},\n", " {'entity': 'el',\n", " 'score': 0.11763253,\n", " 'index': 217,\n", " 'word': '▁became',\n", " 'start': 885,\n", " 'end': 891},\n", " {'entity': 'en',\n", " 'score': 0.1485309,\n", " 'index': 218,\n", " 'word': '▁a',\n", " 'start': 892,\n", " 'end': 893},\n", " {'entity': 'el',\n", " 'score': 0.09540079,\n", " 'index': 219,\n", " 'word': '▁pop',\n", " 'start': 894,\n", " 'end': 897},\n", " {'entity': 'el',\n", " 'score': 0.099501655,\n", " 'index': 220,\n", " 'word': '▁icon',\n", " 'start': 898,\n", " 'end': 902},\n", " {'entity': 'en',\n", " 'score': 0.13926134,\n", " 'index': 221,\n", " 'word': ';',\n", " 'start': 902,\n", " 'end': 903},\n", " {'entity': 'en',\n", " 'score': 0.12448082,\n", " 'index': 222,\n", " 'word': '▁shot',\n", " 'start': 904,\n", " 'end': 908},\n", " {'entity': 'en',\n", " 'score': 0.14647718,\n", " 'index': 223,\n", " 'word': '▁in',\n", " 'start': 909,\n", " 'end': 911},\n", " {'entity': 'en',\n", " 'score': 0.10021849,\n", " 'index': 224,\n", " 'word': '▁movies',\n", " 'start': 912,\n", " 'end': 918},\n", " {'entity': 'en',\n", " 'score': 0.12970805,\n", " 'index': 225,\n", " 'word': ',',\n", " 'start': 918,\n", " 'end': 919},\n", " {'entity': 'en',\n", " 'score': 0.15426567,\n", " 'index': 226,\n", " 'word': '▁appeared',\n", " 'start': 920,\n", " 'end': 928},\n", " {'entity': 'en',\n", " 'score': 0.14625426,\n", " 'index': 227,\n", " 'word': '▁in',\n", " 'start': 929,\n", " 'end': 931},\n", " {'entity': 'el',\n", " 'score': 0.11217332,\n", " 'index': 228,\n", " 'word': '▁books',\n", " 'start': 932,\n", " 'end': 937},\n", " {'entity': 'el',\n", " 'score': 0.117495194,\n", " 'index': 229,\n", " 'word': ',',\n", " 'start': 937,\n", " 'end': 938},\n", " {'entity': 'el',\n", " 'score': 0.12197139,\n", " 'index': 230,\n", " 'word': '▁magazine',\n", " 'start': 939,\n", " 'end': 947},\n", " {'entity': 'en',\n", " 'score': 0.17034268,\n", " 'index': 231,\n", " 'word': 's',\n", " 'start': 947,\n", " 'end': 948},\n", " {'entity': 'el',\n", " 'score': 0.12415104,\n", " 'index': 232,\n", " 'word': ',',\n", " 'start': 948,\n", " 'end': 949},\n", " {'entity': 'el',\n", " 'score': 0.12769704,\n", " 'index': 233,\n", " 'word': '▁radio',\n", " 'start': 950,\n", " 'end': 955},\n", " {'entity': 'el',\n", " 'score': 0.12205196,\n", " 'index': 234,\n", " 'word': '▁talk',\n", " 'start': 956,\n", " 'end': 960},\n", " {'entity': 'el',\n", " 'score': 0.12856682,\n", " 'index': 235,\n", " 'word': '▁shows',\n", " 'start': 961,\n", " 'end': 966},\n", " {'entity': 'en',\n", " 'score': 0.13193049,\n", " 'index': 236,\n", " 'word': ',',\n", " 'start': 966,\n", " 'end': 967},\n", " {'entity': 'en',\n", " 'score': 0.15642078,\n", " 'index': 237,\n", " 'word': '▁and',\n", " 'start': 968,\n", " 'end': 971},\n", " {'entity': 'en',\n", " 'score': 0.15124984,\n", " 'index': 238,\n", " 'word': '▁ha',\n", " 'start': 972,\n", " 'end': 974},\n", " {'entity': 'en',\n", " 'score': 0.1550119,\n", " 'index': 239,\n", " 'word': 'un',\n", " 'start': 974,\n", " 'end': 976},\n", " {'entity': 'en',\n", " 'score': 0.14671806,\n", " 'index': 240,\n", " 'word': 'ted',\n", " 'start': 976,\n", " 'end': 979},\n", " {'entity': 'en',\n", " 'score': 0.1287589,\n", " 'index': 241,\n", " 'word': '▁gro',\n", " 'start': 980,\n", " 'end': 983},\n", " {'entity': 'el',\n", " 'score': 0.12542117,\n", " 'index': 242,\n", " 'word': 'cer',\n", " 'start': 983,\n", " 'end': 986},\n", " {'entity': 'en',\n", " 'score': 0.16864416,\n", " 'index': 243,\n", " 'word': 'y',\n", " 'start': 986,\n", " 'end': 987},\n", " {'entity': 'el',\n", " 'score': 0.11950577,\n", " 'index': 244,\n", " 'word': '▁store',\n", " 'start': 988,\n", " 'end': 993},\n", " {'entity': 'el',\n", " 'score': 0.12966508,\n", " 'index': 245,\n", " 'word': '▁check',\n", " 'start': 994,\n", " 'end': 999},\n", " {'entity': 'el',\n", " 'score': 0.12523231,\n", " 'index': 246,\n", " 'word': 'out',\n", " 'start': 999,\n", " 'end': 1002},\n", " {'entity': 'el',\n", " 'score': 0.10310095,\n", " 'index': 247,\n", " 'word': '▁lines',\n", " 'start': 1003,\n", " 'end': 1008},\n", " {'entity': 'el',\n", " 'score': 0.13300535,\n", " 'index': 248,\n", " 'word': '▁for',\n", " 'start': 1009,\n", " 'end': 1012},\n", " {'entity': 'el',\n", " 'score': 0.116698794,\n", " 'index': 249,\n", " 'word': '▁25',\n", " 'start': 1013,\n", " 'end': 1015},\n", " {'entity': 'el',\n", " 'score': 0.13448924,\n", " 'index': 250,\n", " 'word': '▁years',\n", " 'start': 1016,\n", " 'end': 1021},\n", " {'entity': 'en',\n", " 'score': 0.16032621,\n", " 'index': 251,\n", " 'word': '.',\n", " 'start': 1021,\n", " 'end': 1022},\n", " {'entity': 'el',\n", " 'score': 0.104690254,\n", " 'index': 252,\n", " 'word': '▁Some',\n", " 'start': 1023,\n", " 'end': 1027},\n", " {'entity': 'el',\n", " 'score': 0.12012685,\n", " 'index': 253,\n", " 'word': '▁people',\n", " 'start': 1028,\n", " 'end': 1034},\n", " {'entity': 'el',\n", " 'score': 0.12279296,\n", " 'index': 254,\n", " 'word': '▁thought',\n", " 'start': 1035,\n", " 'end': 1042},\n", " {'entity': 'en',\n", " 'score': 0.14976098,\n", " 'index': 255,\n", " 'word': '▁the',\n", " 'start': 1043,\n", " 'end': 1046},\n", " {'entity': 'el',\n", " 'score': 0.10598802,\n", " 'index': 256,\n", " 'word': '▁natural',\n", " 'start': 1047,\n", " 'end': 1054},\n", " {'entity': 'el',\n", " 'score': 0.10722617,\n", " 'index': 257,\n", " 'word': '▁land',\n", " 'start': 1055,\n", " 'end': 1059},\n", " {'entity': 'en',\n", " 'score': 0.11747365,\n", " 'index': 258,\n", " 'word': 'form',\n", " 'start': 1059,\n", " 'end': 1063},\n", " {'entity': 'el',\n", " 'score': 0.12782969,\n", " 'index': 259,\n", " 'word': '▁was',\n", " 'start': 1064,\n", " 'end': 1067},\n", " {'entity': 'el',\n", " 'score': 0.13294275,\n", " 'index': 260,\n", " 'word': '▁evidence',\n", " 'start': 1068,\n", " 'end': 1076},\n", " {'entity': 'en',\n", " 'score': 0.16003017,\n", " 'index': 261,\n", " 'word': '▁of',\n", " 'start': 1077,\n", " 'end': 1079},\n", " {'entity': 'en',\n", " 'score': 0.13127412,\n", " 'index': 262,\n", " 'word': '▁life',\n", " 'start': 1080,\n", " 'end': 1084},\n", " {'entity': 'el',\n", " 'score': 0.14569594,\n", " 'index': 263,\n", " 'word': '▁on',\n", " 'start': 1085,\n", " 'end': 1087},\n", " {'entity': 'el',\n", " 'score': 0.086975396,\n", " 'index': 264,\n", " 'word': '▁Mars',\n", " 'start': 1088,\n", " 'end': 1092},\n", " {'entity': 'en',\n", " 'score': 0.13438605,\n", " 'index': 265,\n", " 'word': ',',\n", " 'start': 1092,\n", " 'end': 1093},\n", " {'entity': 'en',\n", " 'score': 0.15218768,\n", " 'index': 266,\n", " 'word': '▁and',\n", " 'start': 1094,\n", " 'end': 1097},\n", " {'entity': 'el',\n", " 'score': 0.11441549,\n", " 'index': 267,\n", " 'word': '▁that',\n", " 'start': 1098,\n", " 'end': 1102},\n", " {'entity': 'el',\n", " 'score': 0.10406643,\n", " 'index': 268,\n", " 'word': '▁us',\n", " 'start': 1103,\n", " 'end': 1105},\n", " {'entity': 'el',\n", " 'score': 0.1157087,\n", " 'index': 269,\n", " 'word': '▁scientist',\n", " 'start': 1106,\n", " 'end': 1115},\n", " {'entity': 'en',\n", " 'score': 0.16390227,\n", " 'index': 270,\n", " 'word': 's',\n", " 'start': 1115,\n", " 'end': 1116},\n", " {'entity': 'el',\n", " 'score': 0.12261817,\n", " 'index': 271,\n", " 'word': '▁wanted',\n", " 'start': 1117,\n", " 'end': 1123},\n", " {'entity': 'en',\n", " 'score': 0.17378253,\n", " 'index': 272,\n", " 'word': '▁to',\n", " 'start': 1124,\n", " 'end': 1126},\n", " {'entity': 'en',\n", " 'score': 0.14640202,\n", " 'index': 273,\n", " 'word': '▁hi',\n", " 'start': 1127,\n", " 'end': 1129},\n", " {'entity': 'en',\n", " 'score': 0.14813536,\n", " 'index': 274,\n", " 'word': 'de',\n", " 'start': 1129,\n", " 'end': 1131},\n", " {'entity': 'en',\n", " 'score': 0.13482781,\n", " 'index': 275,\n", " 'word': '▁it',\n", " 'start': 1132,\n", " 'end': 1134},\n", " {'entity': 'en',\n", " 'score': 0.1534762,\n", " 'index': 276,\n", " 'word': ',',\n", " 'start': 1134,\n", " 'end': 1135},\n", " {'entity': 'el',\n", " 'score': 0.12395833,\n", " 'index': 277,\n", " 'word': '▁but',\n", " 'start': 1136,\n", " 'end': 1139},\n", " {'entity': 'el',\n", " 'score': 0.0911575,\n", " 'index': 278,\n", " 'word': '▁really',\n", " 'start': 1140,\n", " 'end': 1146},\n", " {'entity': 'el',\n", " 'score': 0.12752935,\n", " 'index': 279,\n", " 'word': ',',\n", " 'start': 1146,\n", " 'end': 1147},\n", " {'entity': 'el',\n", " 'score': 0.13102081,\n", " 'index': 280,\n", " 'word': '▁the',\n", " 'start': 1148,\n", " 'end': 1151},\n", " {'entity': 'el',\n", " 'score': 0.09118256,\n", " 'index': 281,\n", " 'word': '▁defender',\n", " 'start': 1152,\n", " 'end': 1160},\n", " {'entity': 'el',\n", " 'score': 0.12889816,\n", " 'index': 282,\n", " 'word': 's',\n", " 'start': 1160,\n", " 'end': 1161},\n", " {'entity': 'el',\n", " 'score': 0.131198,\n", " 'index': 283,\n", " 'word': '▁of',\n", " 'start': 1162,\n", " 'end': 1164},\n", " {'entity': 'el',\n", " 'score': 0.13177402,\n", " 'index': 284,\n", " 'word': '▁the',\n", " 'start': 1165,\n", " 'end': 1168},\n", " {'entity': 'el',\n", " 'score': 0.0976391,\n", " 'index': 285,\n", " 'word': '▁NASA',\n", " 'start': 1169,\n", " 'end': 1173},\n", " {'entity': 'ja',\n", " 'score': 0.077623695,\n", " 'index': 286,\n", " 'word': '▁budget',\n", " 'start': 1174,\n", " 'end': 1180},\n", " {'entity': 'el',\n", " 'score': 0.10235032,\n", " 'index': 287,\n", " 'word': '▁wish',\n", " 'start': 1181,\n", " 'end': 1185},\n", " {'entity': 'el',\n", " 'score': 0.11458244,\n", " 'index': 288,\n", " 'word': '▁there',\n", " 'start': 1186,\n", " 'end': 1191},\n", " {'entity': 'el',\n", " 'score': 0.12292923,\n", " 'index': 289,\n", " 'word': '▁was',\n", " 'start': 1192,\n", " 'end': 1195},\n", " {'entity': 'en',\n", " 'score': 0.13890299,\n", " 'index': 290,\n", " 'word': '▁an',\n", " 'start': 1196,\n", " 'end': 1198},\n", " {'entity': 'el',\n", " 'score': 0.12614945,\n", " 'index': 291,\n", " 'word': 'cient',\n", " 'start': 1198,\n", " 'end': 1203},\n", " {'entity': 'el',\n", " 'score': 0.098485425,\n", " 'index': 292,\n", " 'word': '▁civiliza',\n", " 'start': 1204,\n", " 'end': 1212},\n", " {'entity': 'en',\n", " 'score': 0.13695684,\n", " 'index': 293,\n", " 'word': 'tion',\n", " 'start': 1212,\n", " 'end': 1216},\n", " {'entity': 'el',\n", " 'score': 0.12930857,\n", " 'index': 294,\n", " 'word': '▁on',\n", " 'start': 1217,\n", " 'end': 1219},\n", " {'entity': 'en',\n", " 'score': 0.091095164,\n", " 'index': 295,\n", " 'word': '▁Mars',\n", " 'start': 1220,\n", " 'end': 1224},\n", " {'entity': 'en',\n", " 'score': 0.15858984,\n", " 'index': 296,\n", " 'word': '.',\n", " 'start': 1224,\n", " 'end': 1225},\n", " {'entity': 'en',\n", " 'score': 0.12306447,\n", " 'index': 297,\n", " 'word': '▁We',\n", " 'start': 1226,\n", " 'end': 1228},\n", " {'entity': 'el',\n", " 'score': 0.12401779,\n", " 'index': 298,\n", " 'word': '▁decided',\n", " 'start': 1229,\n", " 'end': 1236},\n", " {'entity': 'en',\n", " 'score': 0.1532955,\n", " 'index': 299,\n", " 'word': '▁to',\n", " 'start': 1237,\n", " 'end': 1239},\n", " {'entity': 'el',\n", " 'score': 0.10732532,\n", " 'index': 300,\n", " 'word': '▁take',\n", " 'start': 1240,\n", " 'end': 1244},\n", " {'entity': 'el',\n", " 'score': 0.10733517,\n", " 'index': 301,\n", " 'word': '▁another',\n", " 'start': 1245,\n", " 'end': 1252},\n", " {'entity': 'el',\n", " 'score': 0.101071455,\n", " 'index': 302,\n", " 'word': '▁shot',\n", " 'start': 1253,\n", " 'end': 1257},\n", " {'entity': 'el',\n", " 'score': 0.11504756,\n", " 'index': 303,\n", " 'word': '▁just',\n", " 'start': 1258,\n", " 'end': 1262},\n", " {'entity': 'en',\n", " 'score': 0.16051938,\n", " 'index': 304,\n", " 'word': '▁to',\n", " 'start': 1263,\n", " 'end': 1265},\n", " {'entity': 'en',\n", " 'score': 0.1526192,\n", " 'index': 305,\n", " 'word': '▁make',\n", " 'start': 1266,\n", " 'end': 1270},\n", " {'entity': 'el',\n", " 'score': 0.1414837,\n", " 'index': 306,\n", " 'word': '▁sure',\n", " 'start': 1271,\n", " 'end': 1275},\n", " {'entity': 'el',\n", " 'score': 0.13466269,\n", " 'index': 307,\n", " 'word': '▁we',\n", " 'start': 1276,\n", " 'end': 1278},\n", " {'entity': 'en',\n", " 'score': 0.15096475,\n", " 'index': 308,\n", " 'word': '▁were',\n", " 'start': 1279,\n", " 'end': 1283},\n", " {'entity': 'en',\n", " 'score': 0.1969613,\n", " 'index': 309,\n", " 'word': 'n',\n", " 'start': 1283,\n", " 'end': 1284},\n", " {'entity': 'en',\n", " 'score': 0.15865912,\n", " 'index': 310,\n", " 'word': \"'\",\n", " 'start': 1284,\n", " 'end': 1285},\n", " {'entity': 'en',\n", " 'score': 0.16572025,\n", " 'index': 311,\n", " 'word': 't',\n", " 'start': 1285,\n", " 'end': 1286},\n", " {'entity': 'el',\n", " 'score': 0.12193232,\n", " 'index': 312,\n", " 'word': '▁wrong',\n", " 'start': 1287,\n", " 'end': 1292},\n", " {'entity': 'el',\n", " 'score': 0.10861691,\n", " 'index': 313,\n", " 'word': ',',\n", " 'start': 1292,\n", " 'end': 1293},\n", " {'entity': 'el',\n", " 'score': 0.12936045,\n", " 'index': 314,\n", " 'word': '▁on',\n", " 'start': 1294,\n", " 'end': 1296},\n", " {'entity': 'en',\n", " 'score': 0.10274368,\n", " 'index': 315,\n", " 'word': '▁April',\n", " 'start': 1297,\n", " 'end': 1302},\n", " {'entity': 'el',\n", " 'score': 0.11178034,\n", " 'index': 316,\n", " 'word': '▁5',\n", " 'start': 1303,\n", " 'end': 1304},\n", " {'entity': 'en',\n", " 'score': 0.12274597,\n", " 'index': 317,\n", " 'word': ',',\n", " 'start': 1304,\n", " 'end': 1305},\n", " {'entity': 'en',\n", " 'score': 0.13640046,\n", " 'index': 318,\n", " 'word': '▁1998.',\n", " 'start': 1306,\n", " 'end': 1311},\n", " {'entity': 'el',\n", " 'score': 0.09738847,\n", " 'index': 319,\n", " 'word': '▁Michael',\n", " 'start': 1312,\n", " 'end': 1319},\n", " {'entity': 'ar',\n", " 'score': 0.0863802,\n", " 'index': 320,\n", " 'word': '▁Malin',\n", " 'start': 1320,\n", " 'end': 1325},\n", " {'entity': 'el',\n", " 'score': 0.13613664,\n", " 'index': 321,\n", " 'word': '▁and',\n", " 'start': 1326,\n", " 'end': 1329},\n", " {'entity': 'el',\n", " 'score': 0.12056482,\n", " 'index': 322,\n", " 'word': '▁his',\n", " 'start': 1330,\n", " 'end': 1333},\n", " {'entity': 'el',\n", " 'score': 0.084936745,\n", " 'index': 323,\n", " 'word': '▁Mars',\n", " 'start': 1334,\n", " 'end': 1338},\n", " {'entity': 'en',\n", " 'score': 0.09353773,\n", " 'index': 324,\n", " 'word': '▁Or',\n", " 'start': 1339,\n", " 'end': 1341},\n", " {'entity': 'el',\n", " 'score': 0.09600473,\n", " 'index': 325,\n", " 'word': 'bit',\n", " 'start': 1341,\n", " 'end': 1344},\n", " {'entity': 'en',\n", " 'score': 0.13925415,\n", " 'index': 326,\n", " 'word': 'er',\n", " 'start': 1344,\n", " 'end': 1346},\n", " {'entity': 'el',\n", " 'score': 0.106588826,\n", " 'index': 327,\n", " 'word': '▁camera',\n", " 'start': 1347,\n", " 'end': 1353},\n", " {'entity': 'el',\n", " 'score': 0.11866151,\n", " 'index': 328,\n", " 'word': '▁team',\n", " 'start': 1354,\n", " 'end': 1358},\n", " {'entity': 'el',\n", " 'score': 0.1267877,\n", " 'index': 329,\n", " 'word': '▁took',\n", " 'start': 1359,\n", " 'end': 1363},\n", " {'entity': 'en',\n", " 'score': 0.1520372,\n", " 'index': 330,\n", " 'word': '▁a',\n", " 'start': 1364,\n", " 'end': 1365},\n", " {'entity': 'el',\n", " 'score': 0.11103296,\n", " 'index': 331,\n", " 'word': '▁picture',\n", " 'start': 1366,\n", " 'end': 1373},\n", " {'entity': 'en',\n", " 'score': 0.1478278,\n", " 'index': 332,\n", " 'word': '▁that',\n", " 'start': 1374,\n", " 'end': 1378},\n", " {'entity': 'el',\n", " 'score': 0.13980234,\n", " 'index': 333,\n", " 'word': '▁was',\n", " 'start': 1379,\n", " 'end': 1382},\n", " {'entity': 'el',\n", " 'score': 0.14334293,\n", " 'index': 334,\n", " 'word': '▁ten',\n", " 'start': 1383,\n", " 'end': 1386},\n", " {'entity': 'el',\n", " 'score': 0.17359938,\n", " 'index': 335,\n", " 'word': '▁times',\n", " 'start': 1387,\n", " 'end': 1392},\n", " {'entity': 'el',\n", " 'score': 0.115307085,\n", " 'index': 336,\n", " 'word': '▁sharp',\n", " 'start': 1393,\n", " 'end': 1398},\n", " {'entity': 'el',\n", " 'score': 0.13487983,\n", " 'index': 337,\n", " 'word': 'er',\n", " 'start': 1398,\n", " 'end': 1400},\n", " {'entity': 'el',\n", " 'score': 0.15190518,\n", " 'index': 338,\n", " 'word': '▁than',\n", " 'start': 1401,\n", " 'end': 1405},\n", " {'entity': 'en',\n", " 'score': 0.16202395,\n", " 'index': 339,\n", " 'word': '▁the',\n", " 'start': 1406,\n", " 'end': 1409},\n", " {'entity': 'el',\n", " 'score': 0.12381832,\n", " 'index': 340,\n", " 'word': '▁original',\n", " 'start': 1410,\n", " 'end': 1418},\n", " {'entity': 'el',\n", " 'score': 0.07887648,\n", " 'index': 341,\n", " 'word': '▁Viking',\n", " 'start': 1419,\n", " 'end': 1425},\n", " {'entity': 'el',\n", " 'score': 0.1085552,\n", " 'index': 342,\n", " 'word': '▁photos',\n", " 'start': 1426,\n", " 'end': 1432},\n", " {'entity': 'el',\n", " 'score': 0.12933005,\n", " 'index': 343,\n", " 'word': ',',\n", " 'start': 1432,\n", " 'end': 1433},\n", " {'entity': 'el',\n", " 'score': 0.13211969,\n", " 'index': 344,\n", " 'word': '▁reveal',\n", " 'start': 1434,\n", " 'end': 1440},\n", " {'entity': 'en',\n", " 'score': 0.13877197,\n", " 'index': 345,\n", " 'word': 'ing',\n", " 'start': 1440,\n", " 'end': 1443},\n", " {'entity': 'el',\n", " 'score': 0.14274132,\n", " 'index': 346,\n", " 'word': '▁a',\n", " 'start': 1444,\n", " 'end': 1445},\n", " {'entity': 'el',\n", " 'score': 0.12289959,\n", " 'index': 347,\n", " 'word': '▁natural',\n", " 'start': 1446,\n", " 'end': 1453},\n", " {'entity': 'el',\n", " 'score': 0.11685172,\n", " 'index': 348,\n", " 'word': '▁land',\n", " 'start': 1454,\n", " 'end': 1458},\n", " {'entity': 'el',\n", " 'score': 0.1232755,\n", " 'index': 349,\n", " 'word': 'form',\n", " 'start': 1458,\n", " 'end': 1462},\n", " {'entity': 'el',\n", " 'score': 0.12781556,\n", " 'index': 350,\n", " 'word': ',',\n", " 'start': 1462,\n", " 'end': 1463},\n", " {'entity': 'en',\n", " 'score': 0.15072589,\n", " 'index': 351,\n", " 'word': '▁which',\n", " 'start': 1464,\n", " 'end': 1469},\n", " {'entity': 'el',\n", " 'score': 0.11072004,\n", " 'index': 352,\n", " 'word': '▁meant',\n", " 'start': 1470,\n", " 'end': 1475},\n", " {'entity': 'el',\n", " 'score': 0.12505238,\n", " 'index': 353,\n", " 'word': '▁no',\n", " 'start': 1476,\n", " 'end': 1478},\n", " {'entity': 'ar',\n", " 'score': 0.08642981,\n", " 'index': 354,\n", " 'word': '▁alien',\n", " 'start': 1479,\n", " 'end': 1484},\n", " {'entity': 'pl',\n", " 'score': 0.075424016,\n", " 'index': 355,\n", " 'word': '▁monument',\n", " 'start': 1485,\n", " 'end': 1493},\n", " {'entity': 'en',\n", " 'score': 0.15359755,\n", " 'index': 356,\n", " 'word': '.',\n", " 'start': 1493,\n", " 'end': 1494},\n", " {'entity': 'el',\n", " 'score': 0.11350483,\n", " 'index': 357,\n", " 'word': '▁\"',\n", " 'start': 1495,\n", " 'end': 1496},\n", " {'entity': 'el',\n", " 'score': 0.11290481,\n", " 'index': 358,\n", " 'word': 'But',\n", " 'start': 1496,\n", " 'end': 1499},\n", " {'entity': 'el',\n", " 'score': 0.12361978,\n", " 'index': 359,\n", " 'word': '▁that',\n", " 'start': 1500,\n", " 'end': 1504},\n", " {'entity': 'el',\n", " 'score': 0.11042566,\n", " 'index': 360,\n", " 'word': '▁picture',\n", " 'start': 1505,\n", " 'end': 1512},\n", " {'entity': 'en',\n", " 'score': 0.13758177,\n", " 'index': 361,\n", " 'word': '▁wasn',\n", " 'start': 1513,\n", " 'end': 1517},\n", " {'entity': 'en',\n", " 'score': 0.14231738,\n", " 'index': 362,\n", " 'word': \"'\",\n", " 'start': 1517,\n", " 'end': 1518},\n", " {'entity': 'en',\n", " 'score': 0.1495189,\n", " 'index': 363,\n", " 'word': 't',\n", " 'start': 1518,\n", " 'end': 1519},\n", " {'entity': 'el',\n", " 'score': 0.13938442,\n", " 'index': 364,\n", " 'word': '▁very',\n", " 'start': 1520,\n", " 'end': 1524},\n", " {'entity': 'el',\n", " 'score': 0.13038687,\n", " 'index': 365,\n", " 'word': '▁clear',\n", " 'start': 1525,\n", " 'end': 1530},\n", " {'entity': 'el',\n", " 'score': 0.144432,\n", " 'index': 366,\n", " 'word': '▁at',\n", " 'start': 1531,\n", " 'end': 1533},\n", " {'entity': 'el',\n", " 'score': 0.15075718,\n", " 'index': 367,\n", " 'word': '▁all',\n", " 'start': 1534,\n", " 'end': 1537},\n", " {'entity': 'el',\n", " 'score': 0.12753487,\n", " 'index': 368,\n", " 'word': ',',\n", " 'start': 1537,\n", " 'end': 1538},\n", " {'entity': 'el',\n", " 'score': 0.13230762,\n", " 'index': 369,\n", " 'word': '▁which',\n", " 'start': 1539,\n", " 'end': 1544},\n", " {'entity': 'el',\n", " 'score': 0.12133482,\n", " 'index': 370,\n", " 'word': '▁could',\n", " 'start': 1545,\n", " 'end': 1550},\n", " {'entity': 'el',\n", " 'score': 0.15801905,\n", " 'index': 371,\n", " 'word': '▁mean',\n", " 'start': 1551,\n", " 'end': 1555},\n", " {'entity': 'el',\n", " 'score': 0.09326559,\n", " 'index': 372,\n", " 'word': '▁alien',\n", " 'start': 1556,\n", " 'end': 1561},\n", " {'entity': 'el',\n", " 'score': 0.112567335,\n", " 'index': 373,\n", " 'word': '▁mark',\n", " 'start': 1562,\n", " 'end': 1566},\n", " {'entity': 'el',\n", " 'score': 0.13536157,\n", " 'index': 374,\n", " 'word': 'ings',\n", " 'start': 1566,\n", " 'end': 1570},\n", " {'entity': 'el',\n", " 'score': 0.13931376,\n", " 'index': 375,\n", " 'word': '▁were',\n", " 'start': 1571,\n", " 'end': 1575},\n", " {'entity': 'el',\n", " 'score': 0.1297942,\n", " 'index': 376,\n", " 'word': '▁hidden',\n", " 'start': 1576,\n", " 'end': 1582},\n", " {'entity': 'el',\n", " 'score': 0.12882504,\n", " 'index': 377,\n", " 'word': '▁by',\n", " 'start': 1583,\n", " 'end': 1585},\n", " {'entity': 'en',\n", " 'score': 0.13760869,\n", " 'index': 378,\n", " 'word': '▁ha',\n", " 'start': 1586,\n", " 'end': 1588},\n", " {'entity': 'en',\n", " 'score': 0.11820038,\n", " 'index': 379,\n", " 'word': 'ze',\n", " 'start': 1588,\n", " 'end': 1590},\n", " {'entity': 'en',\n", " 'score': 0.12755689,\n", " 'index': 380,\n", " 'word': '\"',\n", " 'start': 1590,\n", " 'end': 1591},\n", " {'entity': 'el',\n", " 'score': 0.08038798,\n", " 'index': 381,\n", " 'word': '▁Well',\n", " 'start': 1592,\n", " 'end': 1596},\n", " {'entity': 'el',\n", " 'score': 0.106813006,\n", " 'index': 382,\n", " 'word': '▁no',\n", " 'start': 1597,\n", " 'end': 1599},\n", " {'entity': 'el',\n", " 'score': 0.13041495,\n", " 'index': 383,\n", " 'word': ',',\n", " 'start': 1599,\n", " 'end': 1600},\n", " {'entity': 'el',\n", " 'score': 0.100906804,\n", " 'index': 384,\n", " 'word': '▁yes',\n", " 'start': 1601,\n", " 'end': 1604},\n", " {'entity': 'el',\n", " 'score': 0.11085139,\n", " 'index': 385,\n", " 'word': '▁that',\n", " 'start': 1605,\n", " 'end': 1609},\n", " {'entity': 'el',\n", " 'score': 0.090841085,\n", " 'index': 386,\n", " 'word': '▁rumor',\n", " 'start': 1610,\n", " 'end': 1615},\n", " {'entity': 'el',\n", " 'score': 0.0912762,\n", " 'index': 387,\n", " 'word': '▁started',\n", " 'start': 1616,\n", " 'end': 1623},\n", " {'entity': 'en',\n", " 'score': 0.15546727,\n", " 'index': 388,\n", " 'word': ',',\n", " 'start': 1623,\n", " 'end': 1624},\n", " {'entity': 'el',\n", " 'score': 0.12896377,\n", " 'index': 389,\n", " 'word': '▁but',\n", " 'start': 1625,\n", " 'end': 1628},\n", " {'entity': 'el',\n", " 'score': 0.14137326,\n", " 'index': 390,\n", " 'word': '▁to',\n", " 'start': 1629,\n", " 'end': 1631},\n", " {'entity': 'el',\n", " 'score': 0.1308083,\n", " 'index': 391,\n", " 'word': '▁prove',\n", " 'start': 1632,\n", " 'end': 1637},\n", " {'entity': 'el',\n", " 'score': 0.120038114,\n", " 'index': 392,\n", " 'word': '▁them',\n", " 'start': 1638,\n", " 'end': 1642},\n", " {'entity': 'el',\n", " 'score': 0.12024542,\n", " 'index': 393,\n", " 'word': '▁wrong',\n", " 'start': 1643,\n", " 'end': 1648},\n", " {'entity': 'el',\n", " 'score': 0.13746382,\n", " 'index': 394,\n", " 'word': '▁on',\n", " 'start': 1649,\n", " 'end': 1651},\n", " {'entity': 'el',\n", " 'score': 0.10328479,\n", " 'index': 395,\n", " 'word': '▁April',\n", " 'start': 1652,\n", " 'end': 1657},\n", " {'entity': 'el',\n", " 'score': 0.11870165,\n", " 'index': 396,\n", " 'word': '▁8',\n", " 'start': 1658,\n", " 'end': 1659},\n", " {'entity': 'el',\n", " 'score': 0.1222689,\n", " 'index': 397,\n", " 'word': ',',\n", " 'start': 1659,\n", " 'end': 1660},\n", " {'entity': 'el',\n", " 'score': 0.109291695,\n", " 'index': 398,\n", " 'word': '▁2001',\n", " 'start': 1661,\n", " 'end': 1665},\n", " {'entity': 'el',\n", " 'score': 0.13815217,\n", " 'index': 399,\n", " 'word': '▁we',\n", " 'start': 1666,\n", " 'end': 1668},\n", " {'entity': 'el',\n", " 'score': 0.14033255,\n", " 'index': 400,\n", " 'word': '▁decided',\n", " 'start': 1669,\n", " 'end': 1676},\n", " {'entity': 'en',\n", " 'score': 0.15627232,\n", " 'index': 401,\n", " 'word': '▁to',\n", " 'start': 1677,\n", " 'end': 1679},\n", " {'entity': 'el',\n", " 'score': 0.11807532,\n", " 'index': 402,\n", " 'word': '▁take',\n", " 'start': 1680,\n", " 'end': 1684},\n", " {'entity': 'el',\n", " 'score': 0.11256976,\n", " 'index': 403,\n", " 'word': '▁another',\n", " 'start': 1685,\n", " 'end': 1692},\n", " {'entity': 'el',\n", " 'score': 0.10331523,\n", " 'index': 404,\n", " 'word': '▁picture',\n", " 'start': 1693,\n", " 'end': 1700},\n", " {'entity': 'el',\n", " 'score': 0.13425903,\n", " 'index': 405,\n", " 'word': ',',\n", " 'start': 1700,\n", " 'end': 1701},\n", " {'entity': 'el',\n", " 'score': 0.1378113,\n", " 'index': 406,\n", " 'word': '▁making',\n", " 'start': 1702,\n", " 'end': 1708},\n", " {'entity': 'el',\n", " 'score': 0.15436757,\n", " 'index': 407,\n", " 'word': '▁sure',\n", " 'start': 1709,\n", " 'end': 1713},\n", " {'entity': 'en',\n", " 'score': 0.15380576,\n", " 'index': 408,\n", " 'word': '▁it',\n", " 'start': 1714,\n", " 'end': 1716},\n", " {'entity': 'el',\n", " 'score': 0.13963144,\n", " 'index': 409,\n", " 'word': '▁was',\n", " 'start': 1717,\n", " 'end': 1720},\n", " {'entity': 'en',\n", " 'score': 0.14155927,\n", " 'index': 410,\n", " 'word': '▁a',\n", " 'start': 1721,\n", " 'end': 1722},\n", " {'entity': 'en',\n", " 'score': 0.09928071,\n", " 'index': 411,\n", " 'word': '▁cloud',\n", " 'start': 1723,\n", " 'end': 1728},\n", " {'entity': 'el',\n", " 'score': 0.12012173,\n", " 'index': 412,\n", " 'word': 'less',\n", " 'start': 1728,\n", " 'end': 1732},\n", " {'entity': 'el',\n", " 'score': 0.10342507,\n", " 'index': 413,\n", " 'word': '▁summer',\n", " 'start': 1733,\n", " 'end': 1739},\n", " {'entity': 'el',\n", " 'score': 0.12684572,\n", " 'index': 414,\n", " 'word': '▁day',\n", " 'start': 1740,\n", " 'end': 1743},\n", " {'entity': 'en',\n", " 'score': 0.1442453,\n", " 'index': 415,\n", " 'word': '.',\n", " 'start': 1743,\n", " 'end': 1744},\n", " {'entity': 'el',\n", " 'score': 0.087297305,\n", " 'index': 416,\n", " 'word': '▁Malin',\n", " 'start': 1745,\n", " 'end': 1750},\n", " {'entity': 'en',\n", " 'score': 0.13008924,\n", " 'index': 417,\n", " 'word': \"'\",\n", " 'start': 1750,\n", " 'end': 1751},\n", " {'entity': 'en',\n", " 'score': 0.13482796,\n", " 'index': 418,\n", " 'word': 's',\n", " 'start': 1751,\n", " 'end': 1752},\n", " {'entity': 'el',\n", " 'score': 0.114518456,\n", " 'index': 419,\n", " 'word': '▁team',\n", " 'start': 1753,\n", " 'end': 1757},\n", " {'entity': 'el',\n", " 'score': 0.12293864,\n", " 'index': 420,\n", " 'word': '▁capture',\n", " 'start': 1758,\n", " 'end': 1765},\n", " {'entity': 'el',\n", " 'score': 0.13779704,\n", " 'index': 421,\n", " 'word': 'd',\n", " 'start': 1765,\n", " 'end': 1766},\n", " {'entity': 'el',\n", " 'score': 0.1461829,\n", " 'index': 422,\n", " 'word': '▁an',\n", " 'start': 1767,\n", " 'end': 1769},\n", " {'entity': 'el',\n", " 'score': 0.11995733,\n", " 'index': 423,\n", " 'word': '▁amazing',\n", " 'start': 1770,\n", " 'end': 1777},\n", " {'entity': 'el',\n", " 'score': 0.10462855,\n", " 'index': 424,\n", " 'word': '▁photo',\n", " 'start': 1778,\n", " 'end': 1783},\n", " {'entity': 'el',\n", " 'score': 0.11723955,\n", " 'index': 425,\n", " 'word': '▁using',\n", " 'start': 1784,\n", " 'end': 1789},\n", " {'entity': 'en',\n", " 'score': 0.14316602,\n", " 'index': 426,\n", " 'word': '▁the',\n", " 'start': 1790,\n", " 'end': 1793},\n", " {'entity': 'el',\n", " 'score': 0.113886684,\n", " 'index': 427,\n", " 'word': '▁camera',\n", " 'start': 1794,\n", " 'end': 1800},\n", " {'entity': 'el',\n", " 'score': 0.117555104,\n", " 'index': 428,\n", " 'word': \"'\",\n", " 'start': 1800,\n", " 'end': 1801},\n", " {'entity': 'el',\n", " 'score': 0.12543674,\n", " 'index': 429,\n", " 'word': 's',\n", " 'start': 1801,\n", " 'end': 1802},\n", " {'entity': 'el',\n", " 'score': 0.08178501,\n", " 'index': 430,\n", " 'word': '▁absolute',\n", " 'start': 1803,\n", " 'end': 1811},\n", " {'entity': 'ar',\n", " 'score': 0.084494546,\n", " 'index': 431,\n", " 'word': '▁maximum',\n", " 'start': 1812,\n", " 'end': 1819},\n", " {'entity': 'ar',\n", " 'score': 0.07143905,\n", " 'index': 432,\n", " 'word': '▁revolution',\n", " 'start': 1820,\n", " 'end': 1830},\n", " {'entity': 'en',\n", " 'score': 0.12196331,\n", " 'index': 433,\n", " 'word': '.',\n", " 'start': 1830,\n", " 'end': 1831},\n", " {'entity': 'el',\n", " 'score': 0.10553519,\n", " 'index': 434,\n", " 'word': '▁With',\n", " 'start': 1832,\n", " 'end': 1836},\n", " {'entity': 'el',\n", " 'score': 0.12206387,\n", " 'index': 435,\n", " 'word': '▁this',\n", " 'start': 1837,\n", " 'end': 1841},\n", " {'entity': 'el',\n", " 'score': 0.10974171,\n", " 'index': 436,\n", " 'word': '▁camera',\n", " 'start': 1842,\n", " 'end': 1848},\n", " {'entity': 'el',\n", " 'score': 0.12890373,\n", " 'index': 437,\n", " 'word': '▁you',\n", " 'start': 1849,\n", " 'end': 1852},\n", " {'entity': 'el',\n", " 'score': 0.13222878,\n", " 'index': 438,\n", " 'word': '▁can',\n", " 'start': 1853,\n", " 'end': 1856},\n", " {'entity': 'el',\n", " 'score': 0.10733961,\n", " 'index': 439,\n", " 'word': '▁discern',\n", " 'start': 1857,\n", " 'end': 1864},\n", " {'entity': 'el',\n", " 'score': 0.11150802,\n", " 'index': 440,\n", " 'word': '▁things',\n", " 'start': 1865,\n", " 'end': 1871},\n", " {'entity': 'el',\n", " 'score': 0.12971912,\n", " 'index': 441,\n", " 'word': '▁in',\n", " 'start': 1872,\n", " 'end': 1874},\n", " {'entity': 'el',\n", " 'score': 0.14733568,\n", " 'index': 442,\n", " 'word': '▁a',\n", " 'start': 1875,\n", " 'end': 1876},\n", " {'entity': 'el',\n", " 'score': 0.11389056,\n", " 'index': 443,\n", " 'word': '▁digital',\n", " 'start': 1877,\n", " 'end': 1884},\n", " {'entity': 'el',\n", " 'score': 0.11318145,\n", " 'index': 444,\n", " 'word': '▁image',\n", " 'start': 1885,\n", " 'end': 1890},\n", " {'entity': 'el',\n", " 'score': 0.13167904,\n", " 'index': 445,\n", " 'word': ',',\n", " 'start': 1890,\n", " 'end': 1891},\n", " {'entity': 'el',\n", " 'score': 0.11767233,\n", " 'index': 446,\n", " 'word': '▁3',\n", " 'start': 1892,\n", " 'end': 1893},\n", " {'entity': 'el',\n", " 'score': 0.18132198,\n", " 'index': 447,\n", " 'word': '▁times',\n", " 'start': 1894,\n", " 'end': 1899},\n", " {'entity': 'el',\n", " 'score': 0.14084792,\n", " 'index': 448,\n", " 'word': '▁bigger',\n", " 'start': 1900,\n", " 'end': 1906},\n", " {'entity': 'el',\n", " 'score': 0.14768834,\n", " 'index': 449,\n", " 'word': '▁than',\n", " 'start': 1907,\n", " 'end': 1911},\n", " {'entity': 'en',\n", " 'score': 0.14468607,\n", " 'index': 450,\n", " 'word': '▁the',\n", " 'start': 1912,\n", " 'end': 1915},\n", " {'entity': 'el',\n", " 'score': 0.105042085,\n", " 'index': 451,\n", " 'word': '▁pixel',\n", " 'start': 1916,\n", " 'end': 1921},\n", " {'entity': 'el',\n", " 'score': 0.13860421,\n", " 'index': 452,\n", " 'word': '▁size',\n", " 'start': 1922,\n", " 'end': 1926},\n", " {'entity': 'el',\n", " 'score': 0.12235864,\n", " 'index': 453,\n", " 'word': '▁which',\n", " 'start': 1927,\n", " 'end': 1932},\n", " {'entity': 'el',\n", " 'score': 0.155492,\n", " 'index': 454,\n", " 'word': '▁means',\n", " 'start': 1933,\n", " 'end': 1938},\n", " {'entity': 'el',\n", " 'score': 0.119502716,\n", " 'index': 455,\n", " 'word': '▁if',\n", " 'start': 1939,\n", " 'end': 1941},\n", " {'entity': 'en',\n", " 'score': 0.14724067,\n", " 'index': 456,\n", " 'word': '▁there',\n", " 'start': 1942,\n", " 'end': 1947},\n", " {'entity': 'el',\n", " 'score': 0.13590516,\n", " 'index': 457,\n", " 'word': '▁were',\n", " 'start': 1948,\n", " 'end': 1952},\n", " {'entity': 'el',\n", " 'score': 0.12619598,\n", " 'index': 458,\n", " 'word': '▁any',\n", " 'start': 1953,\n", " 'end': 1956},\n", " {'entity': 'el',\n", " 'score': 0.11781136,\n", " 'index': 459,\n", " 'word': '▁sign',\n", " 'start': 1957,\n", " 'end': 1961},\n", " {'entity': 'en',\n", " 'score': 0.14715949,\n", " 'index': 460,\n", " 'word': 's',\n", " 'start': 1961,\n", " 'end': 1962},\n", " {'entity': 'en',\n", " 'score': 0.1466556,\n", " 'index': 461,\n", " 'word': '▁of',\n", " 'start': 1963,\n", " 'end': 1965},\n", " {'entity': 'el',\n", " 'score': 0.123666875,\n", " 'index': 462,\n", " 'word': '▁life',\n", " 'start': 1966,\n", " 'end': 1970},\n", " {'entity': 'el',\n", " 'score': 0.12355319,\n", " 'index': 463,\n", " 'word': ',',\n", " 'start': 1970,\n", " 'end': 1971},\n", " {'entity': 'el',\n", " 'score': 0.14050364,\n", " 'index': 464,\n", " 'word': '▁you',\n", " 'start': 1972,\n", " 'end': 1975},\n", " {'entity': 'el',\n", " 'score': 0.13710876,\n", " 'index': 465,\n", " 'word': '▁could',\n", " 'start': 1976,\n", " 'end': 1981},\n", " {'entity': 'el',\n", " 'score': 0.14485125,\n", " 'index': 466,\n", " 'word': '▁easily',\n", " 'start': 1982,\n", " 'end': 1988},\n", " {'entity': 'el',\n", " 'score': 0.13146858,\n", " 'index': 467,\n", " 'word': '▁see',\n", " 'start': 1989,\n", " 'end': 1992},\n", " {'entity': 'el',\n", " 'score': 0.13325945,\n", " 'index': 468,\n", " 'word': '▁what',\n", " 'start': 1993,\n", " 'end': 1997},\n", " {'entity': 'el',\n", " 'score': 0.14085865,\n", " 'index': 469,\n", " 'word': '▁they',\n", " 'start': 1998,\n", " 'end': 2002},\n", " {'entity': 'el',\n", " 'score': 0.13667713,\n", " 'index': 470,\n", " 'word': '▁were',\n", " 'start': 2003,\n", " 'end': 2007},\n", " {'entity': 'en',\n", " 'score': 0.13597657,\n", " 'index': 471,\n", " 'word': '.',\n", " 'start': 2007,\n", " 'end': 2008},\n", " {'entity': 'el',\n", " 'score': 0.109606914,\n", " 'index': 472,\n", " 'word': '▁What',\n", " 'start': 2009,\n", " 'end': 2013},\n", " {'entity': 'en',\n", " 'score': 0.14616685,\n", " 'index': 473,\n", " 'word': '▁the',\n", " 'start': 2014,\n", " 'end': 2017},\n", " {'entity': 'el',\n", " 'score': 0.10915576,\n", " 'index': 474,\n", " 'word': '▁picture',\n", " 'start': 2018,\n", " 'end': 2025},\n", " {'entity': 'el',\n", " 'score': 0.12212968,\n", " 'index': 475,\n", " 'word': '▁showed',\n", " 'start': 2026,\n", " 'end': 2032},\n", " {'entity': 'el',\n", " 'score': 0.11899673,\n", " 'index': 476,\n", " 'word': '▁was',\n", " 'start': 2033,\n", " 'end': 2036},\n", " {'entity': 'el',\n", " 'score': 0.12273235,\n", " 'index': 477,\n", " 'word': '▁the',\n", " 'start': 2037,\n", " 'end': 2040},\n", " {'entity': 'el',\n", " 'score': 0.08423174,\n", " 'index': 478,\n", " 'word': '▁but',\n", " 'start': 2041,\n", " 'end': 2044},\n", " {'entity': 'el',\n", " 'score': 0.098441444,\n", " 'index': 479,\n", " 'word': 'te',\n", " 'start': 2044,\n", " 'end': 2046},\n", " {'entity': 'ar',\n", " 'score': 0.08634294,\n", " 'index': 480,\n", " 'word': '▁or',\n", " 'start': 2047,\n", " 'end': 2049},\n", " {'entity': 'el',\n", " 'score': 0.084831,\n", " 'index': 481,\n", " 'word': '▁mesa',\n", " 'start': 2050,\n", " 'end': 2054},\n", " {'entity': 'el',\n", " 'score': 0.115126304,\n", " 'index': 482,\n", " 'word': ',',\n", " 'start': 2054,\n", " 'end': 2055},\n", " {'entity': 'el',\n", " 'score': 0.12282577,\n", " 'index': 483,\n", " 'word': '▁which',\n", " 'start': 2056,\n", " 'end': 2061},\n", " {'entity': 'el',\n", " 'score': 0.11013289,\n", " 'index': 484,\n", " 'word': '▁are',\n", " 'start': 2062,\n", " 'end': 2065},\n", " {'entity': 'el',\n", " 'score': 0.10554905,\n", " 'index': 485,\n", " 'word': '▁land',\n", " 'start': 2066,\n", " 'end': 2070},\n", " {'entity': 'el',\n", " 'score': 0.10868882,\n", " 'index': 486,\n", " 'word': 'form',\n", " 'start': 2070,\n", " 'end': 2074},\n", " {'entity': 'el',\n", " 'score': 0.118700296,\n", " 'index': 487,\n", " 'word': 's',\n", " 'start': 2074,\n", " 'end': 2075},\n", " {'entity': 'el',\n", " 'score': 0.093575306,\n", " 'index': 488,\n", " 'word': '▁common',\n", " 'start': 2076,\n", " 'end': 2082},\n", " {'entity': 'el',\n", " 'score': 0.11244102,\n", " 'index': 489,\n", " 'word': '▁around',\n", " 'start': 2083,\n", " 'end': 2089},\n", " {'entity': 'en',\n", " 'score': 0.14871824,\n", " 'index': 490,\n", " 'word': '▁the',\n", " 'start': 2090,\n", " 'end': 2093},\n", " {'entity': 'el',\n", " 'score': 0.11480587,\n", " 'index': 491,\n", " 'word': '▁American',\n", " 'start': 2094,\n", " 'end': 2102},\n", " {'entity': 'el',\n", " 'score': 0.10203579,\n", " 'index': 492,\n", " 'word': '▁West',\n", " 'start': 2103,\n", " 'end': 2107},\n", " {'entity': 'en',\n", " 'score': 0.15275316,\n", " 'index': 493,\n", " 'word': '.',\n", " 'start': 2107,\n", " 'end': 2108}]" ] }, "execution_count": 117, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"papluca/xlm-roberta-base-language-detection\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"papluca/xlm-roberta-base-language-detection\")\n", "nlp = pipeline(\"token-classification\", model=model, tokenizer=tokenizer)\n", "\n", "ner_results = nlp(text)\n", "ner_results\n" ] }, { "cell_type": "code", "execution_count": 125, "id": "f76c3b4a-b798-4518-bae7-978c0420fbc3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "ar 8\n", "el 305\n", "en 178\n", "ja 1\n", "pl 1\n", "dtype: int64\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
entityscore
entity
ar80.084216
el3050.120293
en1780.143087
ja10.077624
pl10.075424
\n", "
" ], "text/plain": [ " entity score\n", "entity \n", "ar 8 0.084216\n", "el 305 0.120293\n", "en 178 0.143087\n", "ja 1 0.077624\n", "pl 1 0.075424" ] }, "execution_count": 125, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "with open(\"49 paplucaxlm-roberta-base-language-detection.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()\n", "\n", "aux.groupby(['entity']) \\\n", " .agg({'entity':'size', 'score':'mean'}) \\\n", " #.rename(columns={'text':'count','sent':'mean_sent'}) \\\n", " #.reset_index()" ] }, { "cell_type": "markdown", "id": "148d03c8-bed4-4325-8f59-1fe8438a5615", "metadata": {}, "source": [ "## 50 mbruton/spa_en_mBERT" ] }, { "cell_type": "code", "execution_count": 109, "id": "87a8abbb-47a5-4f90-8655-d3dd9f8ade18", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'entity': 'r0:arg1|tem',\n", " 'score': 0.29541913,\n", " 'index': 4,\n", " 'word': 'you',\n", " 'start': 7,\n", " 'end': 10},\n", " {'entity': 'r0:root',\n", " 'score': 0.4976404,\n", " 'index': 5,\n", " 'word': \"'\",\n", " 'start': 10,\n", " 'end': 11},\n", " {'entity': 'r0:root',\n", " 'score': 0.9618234,\n", " 'index': 6,\n", " 'word': 're',\n", " 'start': 11,\n", " 'end': 13},\n", " {'entity': 'r1:arg1|tem',\n", " 'score': 0.7678349,\n", " 'index': 11,\n", " 'word': 'you',\n", " 'start': 32,\n", " 'end': 35},\n", " {'entity': 'r1:root',\n", " 'score': 0.6855395,\n", " 'index': 13,\n", " 'word': 'be',\n", " 'start': 43,\n", " 'end': 45},\n", " {'entity': 'r1:arg2|atr',\n", " 'score': 0.93614143,\n", " 'index': 14,\n", " 'word': 'able',\n", " 'start': 46,\n", " 'end': 50},\n", " {'entity': 'r2:arg2|ben',\n", " 'score': 0.5274996,\n", " 'index': 17,\n", " 'word': 'me',\n", " 'start': 59,\n", " 'end': 61},\n", " {'entity': 'r2:arg1|pat',\n", " 'score': 0.36818993,\n", " 'index': 20,\n", " 'word': 'story',\n", " 'start': 72,\n", " 'end': 77},\n", " {'entity': 'r4:arg1|tem',\n", " 'score': 0.4968183,\n", " 'index': 27,\n", " 'word': 'which',\n", " 'start': 102,\n", " 'end': 107},\n", " {'entity': 'r3:root',\n", " 'score': 0.5518882,\n", " 'index': 30,\n", " 'word': 'is',\n", " 'start': 118,\n", " 'end': 120},\n", " {'entity': 'r4:arg2|atr',\n", " 'score': 0.7494934,\n", " 'index': 31,\n", " 'word': 'evidence',\n", " 'start': 121,\n", " 'end': 129},\n", " {'entity': 'r6:arg1|tem',\n", " 'score': 0.108391255,\n", " 'index': 33,\n", " 'word': 'there',\n", " 'start': 135,\n", " 'end': 140},\n", " {'entity': 'r6:root',\n", " 'score': 0.52937764,\n", " 'index': 34,\n", " 'word': 'is',\n", " 'start': 141,\n", " 'end': 143},\n", " {'entity': 'r5:arg1|tem',\n", " 'score': 0.44316128,\n", " 'index': 35,\n", " 'word': 'life',\n", " 'start': 144,\n", " 'end': 148},\n", " {'entity': 'r7:arg1|tem',\n", " 'score': 0.27223623,\n", " 'index': 42,\n", " 'word': 'face',\n", " 'start': 171,\n", " 'end': 175},\n", " {'entity': 'r7:root',\n", " 'score': 0.75835687,\n", " 'index': 44,\n", " 'word': 'created',\n", " 'start': 180,\n", " 'end': 187},\n", " {'entity': 'r7:arg0|agt',\n", " 'score': 0.40680537,\n", " 'index': 45,\n", " 'word': 'by',\n", " 'start': 188,\n", " 'end': 190},\n", " {'entity': 'r8:arg0|agt',\n", " 'score': 0.17703968,\n", " 'index': 60,\n", " 'word': 'Viking',\n", " 'start': 240,\n", " 'end': 246},\n", " {'entity': 'r7:arg0|agt',\n", " 'score': 0.26997983,\n", " 'index': 62,\n", " 'word': 'spacecraft',\n", " 'start': 249,\n", " 'end': 259},\n", " {'entity': 'r9:root',\n", " 'score': 0.22341944,\n", " 'index': 64,\n", " 'word': 'ci',\n", " 'start': 264,\n", " 'end': 266},\n", " {'entity': 'r9:root',\n", " 'score': 0.18148886,\n", " 'index': 65,\n", " 'word': '##rc',\n", " 'start': 266,\n", " 'end': 268},\n", " {'entity': 'r9:root',\n", " 'score': 0.21423559,\n", " 'index': 66,\n", " 'word': '##ling',\n", " 'start': 268,\n", " 'end': 272},\n", " {'entity': 'r7:arg1|pat',\n", " 'score': 0.16891205,\n", " 'index': 68,\n", " 'word': 'planet',\n", " 'start': 277,\n", " 'end': 283},\n", " {'entity': 'r9:root',\n", " 'score': 0.1664749,\n", " 'index': 71,\n", " 'word': '##pping',\n", " 'start': 288,\n", " 'end': 293},\n", " {'entity': 'r8:arg1|pat',\n", " 'score': 0.09886724,\n", " 'index': 72,\n", " 'word': 'photos',\n", " 'start': 294,\n", " 'end': 300},\n", " {'entity': 'r8:arg0|agt',\n", " 'score': 0.16721433,\n", " 'index': 75,\n", " 'word': 'it',\n", " 'start': 307,\n", " 'end': 309},\n", " {'entity': 'r9:root',\n", " 'score': 0.13445386,\n", " 'index': 76,\n", " 'word': 'spotted',\n", " 'start': 310,\n", " 'end': 317},\n", " {'entity': 'r8:arg1|pat',\n", " 'score': 0.07838027,\n", " 'index': 81,\n", " 'word': 'like',\n", " 'start': 330,\n", " 'end': 334},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.063725084,\n", " 'index': 82,\n", " 'word': '##ness',\n", " 'start': 334,\n", " 'end': 338},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.041758526,\n", " 'index': 89,\n", " 'word': 'scientists',\n", " 'start': 359,\n", " 'end': 369},\n", " {'entity': 'r10:root',\n", " 'score': 0.058230672,\n", " 'index': 90,\n", " 'word': 'figure',\n", " 'start': 370,\n", " 'end': 376},\n", " {'entity': 'r10:root',\n", " 'score': 0.06219335,\n", " 'index': 91,\n", " 'word': '##d',\n", " 'start': 376,\n", " 'end': 377},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.035768963,\n", " 'index': 94,\n", " 'word': 'it',\n", " 'start': 387,\n", " 'end': 389},\n", " {'entity': 'r10:root',\n", " 'score': 0.04963572,\n", " 'index': 95,\n", " 'word': 'was',\n", " 'start': 390,\n", " 'end': 393},\n", " {'entity': 'r8:argM|adv',\n", " 'score': 0.03322343,\n", " 'index': 103,\n", " 'word': 'around',\n", " 'start': 428,\n", " 'end': 434},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.038057458,\n", " 'index': 110,\n", " 'word': 'one',\n", " 'start': 454,\n", " 'end': 457},\n", " {'entity': 'r10:root',\n", " 'score': 0.0465628,\n", " 'index': 111,\n", " 'word': 'had',\n", " 'start': 458,\n", " 'end': 461},\n", " {'entity': 'r10:root',\n", " 'score': 0.032235544,\n", " 'index': 112,\n", " 'word': 'sh',\n", " 'start': 462,\n", " 'end': 464},\n", " {'entity': 'r10:root',\n", " 'score': 0.0322965,\n", " 'index': 113,\n", " 'word': '##adow',\n", " 'start': 464,\n", " 'end': 468},\n", " {'entity': 'r10:root',\n", " 'score': 0.034283824,\n", " 'index': 114,\n", " 'word': '##s',\n", " 'start': 468,\n", " 'end': 469},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.030779108,\n", " 'index': 115,\n", " 'word': 'that',\n", " 'start': 470,\n", " 'end': 474},\n", " {'entity': 'r10:root',\n", " 'score': 0.046148807,\n", " 'index': 116,\n", " 'word': 'made',\n", " 'start': 475,\n", " 'end': 479},\n", " {'entity': 'r10:root',\n", " 'score': 0.035636976,\n", " 'index': 117,\n", " 'word': 'it',\n", " 'start': 480,\n", " 'end': 482},\n", " {'entity': 'r10:root',\n", " 'score': 0.043233275,\n", " 'index': 118,\n", " 'word': 'look',\n", " 'start': 483,\n", " 'end': 487},\n", " {'entity': 'r10:root',\n", " 'score': 0.031466834,\n", " 'index': 119,\n", " 'word': 'like',\n", " 'start': 488,\n", " 'end': 492},\n", " {'entity': 'r10:root',\n", " 'score': 0.02962308,\n", " 'index': 130,\n", " 'word': 'later',\n", " 'start': 528,\n", " 'end': 533},\n", " {'entity': 'r10:root',\n", " 'score': 0.03362092,\n", " 'index': 132,\n", " 'word': 'we',\n", " 'start': 535,\n", " 'end': 537},\n", " {'entity': 'r10:root',\n", " 'score': 0.04920107,\n", " 'index': 133,\n", " 'word': 'revealed',\n", " 'start': 538,\n", " 'end': 546},\n", " {'entity': 'r10:root',\n", " 'score': 0.035394482,\n", " 'index': 135,\n", " 'word': 'image',\n", " 'start': 551,\n", " 'end': 556},\n", " {'entity': 'r10:root',\n", " 'score': 0.032336798,\n", " 'index': 136,\n", " 'word': 'for',\n", " 'start': 557,\n", " 'end': 560},\n", " {'entity': 'r10:root',\n", " 'score': 0.045922153,\n", " 'index': 139,\n", " 'word': 'see',\n", " 'start': 568,\n", " 'end': 571},\n", " {'entity': 'r10:root',\n", " 'score': 0.035783116,\n", " 'index': 142,\n", " 'word': 'we',\n", " 'start': 577,\n", " 'end': 579},\n", " {'entity': 'r10:root',\n", " 'score': 0.05480558,\n", " 'index': 143,\n", " 'word': 'made',\n", " 'start': 580,\n", " 'end': 584},\n", " {'entity': 'r10:root',\n", " 'score': 0.030482573,\n", " 'index': 144,\n", " 'word': 'sure',\n", " 'start': 585,\n", " 'end': 589},\n", " {'entity': 'r10:root',\n", " 'score': 0.03615033,\n", " 'index': 148,\n", " 'word': 'it',\n", " 'start': 603,\n", " 'end': 605},\n", " {'entity': 'r10:root',\n", " 'score': 0.05102072,\n", " 'index': 149,\n", " 'word': 'was',\n", " 'start': 606,\n", " 'end': 609},\n", " {'entity': 'r10:root',\n", " 'score': 0.032937527,\n", " 'index': 152,\n", " 'word': 'rock',\n", " 'start': 617,\n", " 'end': 621},\n", " {'entity': 'r10:root',\n", " 'score': 0.03535375,\n", " 'index': 153,\n", " 'word': 'formation',\n", " 'start': 622,\n", " 'end': 631},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.047777873,\n", " 'index': 154,\n", " 'word': 'that',\n", " 'start': 632,\n", " 'end': 636},\n", " {'entity': 'r8:argM|adv',\n", " 'score': 0.0540176,\n", " 'index': 155,\n", " 'word': 'just',\n", " 'start': 637,\n", " 'end': 641},\n", " {'entity': 'r10:root',\n", " 'score': 0.06253041,\n", " 'index': 156,\n", " 'word': 'res',\n", " 'start': 642,\n", " 'end': 645},\n", " {'entity': 'r10:root',\n", " 'score': 0.060185175,\n", " 'index': 157,\n", " 'word': '##emble',\n", " 'start': 645,\n", " 'end': 650},\n", " {'entity': 'r10:root',\n", " 'score': 0.07396809,\n", " 'index': 158,\n", " 'word': '##d',\n", " 'start': 650,\n", " 'end': 651},\n", " {'entity': 'r10:root',\n", " 'score': 0.03149155,\n", " 'index': 161,\n", " 'word': 'head',\n", " 'start': 660,\n", " 'end': 664},\n", " {'entity': 'r10:root',\n", " 'score': 0.06597179,\n", " 'index': 170,\n", " 'word': 'formed',\n", " 'start': 693,\n", " 'end': 699},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.031629615,\n", " 'index': 171,\n", " 'word': 'by',\n", " 'start': 700,\n", " 'end': 702},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.060809337,\n", " 'index': 176,\n", " 'word': 'We',\n", " 'start': 712,\n", " 'end': 714},\n", " {'entity': 'r10:root',\n", " 'score': 0.07441239,\n", " 'index': 178,\n", " 'word': 'announced',\n", " 'start': 720,\n", " 'end': 729},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.054461885,\n", " 'index': 179,\n", " 'word': 'it',\n", " 'start': 730,\n", " 'end': 732},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.05075702,\n", " 'index': 181,\n", " 'word': 'we',\n", " 'start': 741,\n", " 'end': 743},\n", " {'entity': 'r10:root',\n", " 'score': 0.052541845,\n", " 'index': 182,\n", " 'word': 'thought',\n", " 'start': 744,\n", " 'end': 751},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.057083007,\n", " 'index': 183,\n", " 'word': 'it',\n", " 'start': 752,\n", " 'end': 754},\n", " {'entity': 'r10:root',\n", " 'score': 0.05793729,\n", " 'index': 185,\n", " 'word': 'be',\n", " 'start': 761,\n", " 'end': 763},\n", " {'entity': 'r8:arg2|atr',\n", " 'score': 0.042124726,\n", " 'index': 188,\n", " 'word': 'way',\n", " 'start': 771,\n", " 'end': 774},\n", " {'entity': 'r10:root',\n", " 'score': 0.06260392,\n", " 'index': 190,\n", " 'word': 'engage',\n", " 'start': 778,\n", " 'end': 784},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.04784509,\n", " 'index': 192,\n", " 'word': 'public',\n", " 'start': 789,\n", " 'end': 795},\n", " {'entity': 'r10:root',\n", " 'score': 0.055024274,\n", " 'index': 200,\n", " 'word': 'at',\n", " 'start': 822,\n", " 'end': 824},\n", " {'entity': 'r10:root',\n", " 'score': 0.05680457,\n", " 'index': 201,\n", " 'word': '##rra',\n", " 'start': 824,\n", " 'end': 827},\n", " {'entity': 'r10:root',\n", " 'score': 0.054766916,\n", " 'index': 202,\n", " 'word': '##ct',\n", " 'start': 827,\n", " 'end': 829},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.03638437,\n", " 'index': 203,\n", " 'word': 'attention',\n", " 'start': 830,\n", " 'end': 839},\n", " {'entity': 'r10:root',\n", " 'score': 0.02923426,\n", " 'index': 204,\n", " 'word': 'to',\n", " 'start': 840,\n", " 'end': 842},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.03866812,\n", " 'index': 209,\n", " 'word': 'it',\n", " 'start': 854,\n", " 'end': 856},\n", " {'entity': 'r10:root',\n", " 'score': 0.04308056,\n", " 'index': 210,\n", " 'word': 'did',\n", " 'start': 857,\n", " 'end': 860},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.04369669,\n", " 'index': 213,\n", " 'word': 'face',\n", " 'start': 867,\n", " 'end': 871},\n", " {'entity': 'r8:argM|adv',\n", " 'score': 0.024818424,\n", " 'index': 216,\n", " 'word': 'soon',\n", " 'start': 880,\n", " 'end': 884},\n", " {'entity': 'r10:root',\n", " 'score': 0.05038194,\n", " 'index': 217,\n", " 'word': 'became',\n", " 'start': 885,\n", " 'end': 891},\n", " {'entity': 'r8:arg2|atr',\n", " 'score': 0.04120699,\n", " 'index': 220,\n", " 'word': 'i',\n", " 'start': 898,\n", " 'end': 899},\n", " {'entity': 'r10:root',\n", " 'score': 0.05299865,\n", " 'index': 223,\n", " 'word': 'shot',\n", " 'start': 904,\n", " 'end': 908},\n", " {'entity': 'r8:argM|adv',\n", " 'score': 0.030761532,\n", " 'index': 224,\n", " 'word': 'in',\n", " 'start': 909,\n", " 'end': 911},\n", " {'entity': 'r9:root',\n", " 'score': 0.082203425,\n", " 'index': 227,\n", " 'word': 'appeared',\n", " 'start': 920,\n", " 'end': 928},\n", " {'entity': 'r10:root',\n", " 'score': 0.024252772,\n", " 'index': 228,\n", " 'word': 'in',\n", " 'start': 929,\n", " 'end': 931},\n", " {'entity': 'r7:argM|tmp',\n", " 'score': 0.08839744,\n", " 'index': 247,\n", " 'word': 'for',\n", " 'start': 1009,\n", " 'end': 1012},\n", " {'entity': 'r7:arg0|agt',\n", " 'score': 0.252022,\n", " 'index': 252,\n", " 'word': 'people',\n", " 'start': 1028,\n", " 'end': 1034},\n", " {'entity': 'r8:root',\n", " 'score': 0.31143436,\n", " 'index': 253,\n", " 'word': 'thought',\n", " 'start': 1035,\n", " 'end': 1042},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.17507401,\n", " 'index': 256,\n", " 'word': 'land',\n", " 'start': 1055,\n", " 'end': 1059},\n", " {'entity': 'r7:arg1|tem',\n", " 'score': 0.18424971,\n", " 'index': 257,\n", " 'word': '##form',\n", " 'start': 1059,\n", " 'end': 1063},\n", " {'entity': 'r8:root',\n", " 'score': 0.26880854,\n", " 'index': 258,\n", " 'word': 'was',\n", " 'start': 1064,\n", " 'end': 1067},\n", " {'entity': 'r8:arg2|atr',\n", " 'score': 0.124720916,\n", " 'index': 259,\n", " 'word': 'evidence',\n", " 'start': 1068,\n", " 'end': 1076},\n", " {'entity': 'r7:arg0|agt',\n", " 'score': 0.44198242,\n", " 'index': 268,\n", " 'word': 'scientists',\n", " 'start': 1106,\n", " 'end': 1116},\n", " {'entity': 'r7:root',\n", " 'score': 0.71973616,\n", " 'index': 269,\n", " 'word': 'wanted',\n", " 'start': 1117,\n", " 'end': 1123},\n", " {'entity': 'r7:root',\n", " 'score': 0.65194285,\n", " 'index': 271,\n", " 'word': 'hide',\n", " 'start': 1127,\n", " 'end': 1131},\n", " {'entity': 'r7:arg1|pat',\n", " 'score': 0.10310135,\n", " 'index': 272,\n", " 'word': 'it',\n", " 'start': 1132,\n", " 'end': 1134},\n", " {'entity': 'r5:arg0|agt',\n", " 'score': 0.37039754,\n", " 'index': 278,\n", " 'word': 'defender',\n", " 'start': 1152,\n", " 'end': 1160},\n", " {'entity': 'r4:arg0|agt',\n", " 'score': 0.1837553,\n", " 'index': 279,\n", " 'word': '##s',\n", " 'start': 1160,\n", " 'end': 1161},\n", " {'entity': 'r4:arg2|atr',\n", " 'score': 0.1392899,\n", " 'index': 285,\n", " 'word': 'there',\n", " 'start': 1186,\n", " 'end': 1191},\n", " {'entity': 'r5:root',\n", " 'score': 0.16138868,\n", " 'index': 286,\n", " 'word': 'was',\n", " 'start': 1192,\n", " 'end': 1195},\n", " {'entity': 'r4:arg1|tem',\n", " 'score': 0.25208256,\n", " 'index': 288,\n", " 'word': 'civili',\n", " 'start': 1204,\n", " 'end': 1210},\n", " {'entity': 'r5:arg1|tem',\n", " 'score': 0.2960885,\n", " 'index': 289,\n", " 'word': '##zation',\n", " 'start': 1210,\n", " 'end': 1216},\n", " {'entity': 'r8:arg0|agt',\n", " 'score': 0.08236764,\n", " 'index': 293,\n", " 'word': 'We',\n", " 'start': 1226,\n", " 'end': 1228},\n", " {'entity': 'r9:root',\n", " 'score': 0.085176714,\n", " 'index': 294,\n", " 'word': 'decided',\n", " 'start': 1229,\n", " 'end': 1236},\n", " {'entity': 'r9:root',\n", " 'score': 0.09762525,\n", " 'index': 296,\n", " 'word': 'take',\n", " 'start': 1240,\n", " 'end': 1244},\n", " {'entity': 'r8:arg1|pat',\n", " 'score': 0.038402967,\n", " 'index': 298,\n", " 'word': 'shot',\n", " 'start': 1253,\n", " 'end': 1257},\n", " {'entity': 'r9:root',\n", " 'score': 0.07043696,\n", " 'index': 301,\n", " 'word': 'make',\n", " 'start': 1266,\n", " 'end': 1270},\n", " {'entity': 'r10:root',\n", " 'score': 0.034738,\n", " 'index': 302,\n", " 'word': 'sure',\n", " 'start': 1271,\n", " 'end': 1275},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.03654291,\n", " 'index': 303,\n", " 'word': 'we',\n", " 'start': 1276,\n", " 'end': 1278},\n", " {'entity': 'r10:root',\n", " 'score': 0.054115873,\n", " 'index': 304,\n", " 'word': 'were',\n", " 'start': 1279,\n", " 'end': 1283},\n", " {'entity': 'r10:root',\n", " 'score': 0.044537026,\n", " 'index': 305,\n", " 'word': '##n',\n", " 'start': 1283,\n", " 'end': 1284},\n", " {'entity': 'r10:root',\n", " 'score': 0.043383468,\n", " 'index': 307,\n", " 'word': 't',\n", " 'start': 1285,\n", " 'end': 1286},\n", " {'entity': 'r8:arg2|atr',\n", " 'score': 0.050688915,\n", " 'index': 308,\n", " 'word': 'wrong',\n", " 'start': 1287,\n", " 'end': 1292},\n", " {'entity': 'r8:argM|adv',\n", " 'score': 0.030045124,\n", " 'index': 310,\n", " 'word': 'on',\n", " 'start': 1294,\n", " 'end': 1296},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.031087063,\n", " 'index': 316,\n", " 'word': 'Michael',\n", " 'start': 1312,\n", " 'end': 1319},\n", " {'entity': 'r10:root',\n", " 'score': 0.037183713,\n", " 'index': 317,\n", " 'word': 'Mali',\n", " 'start': 1320,\n", " 'end': 1324},\n", " {'entity': 'r10:root',\n", " 'score': 0.037349515,\n", " 'index': 318,\n", " 'word': '##n',\n", " 'start': 1324,\n", " 'end': 1325},\n", " {'entity': 'r10:root',\n", " 'score': 0.038758732,\n", " 'index': 325,\n", " 'word': 'team',\n", " 'start': 1354,\n", " 'end': 1358},\n", " {'entity': 'r10:root',\n", " 'score': 0.05292711,\n", " 'index': 326,\n", " 'word': 'took',\n", " 'start': 1359,\n", " 'end': 1363},\n", " {'entity': 'r10:root',\n", " 'score': 0.031445753,\n", " 'index': 328,\n", " 'word': 'picture',\n", " 'start': 1366,\n", " 'end': 1373},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.039939146,\n", " 'index': 329,\n", " 'word': 'that',\n", " 'start': 1374,\n", " 'end': 1378},\n", " {'entity': 'r10:root',\n", " 'score': 0.05810041,\n", " 'index': 330,\n", " 'word': 'was',\n", " 'start': 1379,\n", " 'end': 1382},\n", " {'entity': 'r8:arg2|atr',\n", " 'score': 0.04002515,\n", " 'index': 333,\n", " 'word': 'sharp',\n", " 'start': 1393,\n", " 'end': 1398},\n", " {'entity': 'r10:root',\n", " 'score': 0.03314902,\n", " 'index': 334,\n", " 'word': '##er',\n", " 'start': 1398,\n", " 'end': 1400},\n", " {'entity': 'r10:root',\n", " 'score': 0.04910727,\n", " 'index': 341,\n", " 'word': 'reveal',\n", " 'start': 1434,\n", " 'end': 1440},\n", " {'entity': 'r10:root',\n", " 'score': 0.05482279,\n", " 'index': 342,\n", " 'word': '##ing',\n", " 'start': 1440,\n", " 'end': 1443},\n", " {'entity': 'r10:root',\n", " 'score': 0.033936784,\n", " 'index': 345,\n", " 'word': 'land',\n", " 'start': 1454,\n", " 'end': 1458},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.034768764,\n", " 'index': 348,\n", " 'word': 'which',\n", " 'start': 1464,\n", " 'end': 1469},\n", " {'entity': 'r10:root',\n", " 'score': 0.049932696,\n", " 'index': 349,\n", " 'word': 'meant',\n", " 'start': 1470,\n", " 'end': 1475},\n", " {'entity': 'r10:root',\n", " 'score': 0.035454,\n", " 'index': 352,\n", " 'word': 'monument',\n", " 'start': 1485,\n", " 'end': 1493},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.033968676,\n", " 'index': 357,\n", " 'word': 'picture',\n", " 'start': 1505,\n", " 'end': 1512},\n", " {'entity': 'r10:root',\n", " 'score': 0.053387593,\n", " 'index': 358,\n", " 'word': 'wasn',\n", " 'start': 1513,\n", " 'end': 1517},\n", " {'entity': 'r10:root',\n", " 'score': 0.041064143,\n", " 'index': 360,\n", " 'word': 't',\n", " 'start': 1518,\n", " 'end': 1519},\n", " {'entity': 'r8:arg2|atr',\n", " 'score': 0.046866965,\n", " 'index': 362,\n", " 'word': 'clear',\n", " 'start': 1525,\n", " 'end': 1530},\n", " {'entity': 'r8:argM|adv',\n", " 'score': 0.036013737,\n", " 'index': 363,\n", " 'word': 'at',\n", " 'start': 1531,\n", " 'end': 1533},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.046605524,\n", " 'index': 366,\n", " 'word': 'which',\n", " 'start': 1539,\n", " 'end': 1544},\n", " {'entity': 'r10:root',\n", " 'score': 0.051832046,\n", " 'index': 368,\n", " 'word': 'mean',\n", " 'start': 1551,\n", " 'end': 1555},\n", " {'entity': 'r10:root',\n", " 'score': 0.03425833,\n", " 'index': 369,\n", " 'word': 'alien',\n", " 'start': 1556,\n", " 'end': 1561},\n", " {'entity': 'r10:root',\n", " 'score': 0.036131877,\n", " 'index': 370,\n", " 'word': 'marking',\n", " 'start': 1562,\n", " 'end': 1569},\n", " {'entity': 'r10:root',\n", " 'score': 0.040733457,\n", " 'index': 371,\n", " 'word': '##s',\n", " 'start': 1569,\n", " 'end': 1570},\n", " {'entity': 'r10:root',\n", " 'score': 0.056199037,\n", " 'index': 372,\n", " 'word': 'were',\n", " 'start': 1571,\n", " 'end': 1575},\n", " {'entity': 'r10:root',\n", " 'score': 0.04729384,\n", " 'index': 373,\n", " 'word': 'hidden',\n", " 'start': 1576,\n", " 'end': 1582},\n", " {'entity': 'r10:root',\n", " 'score': 0.02660689,\n", " 'index': 374,\n", " 'word': 'by',\n", " 'start': 1583,\n", " 'end': 1585},\n", " {'entity': 'r10:root',\n", " 'score': 0.0323595,\n", " 'index': 381,\n", " 'word': 'ye',\n", " 'start': 1601,\n", " 'end': 1603},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.037608866,\n", " 'index': 384,\n", " 'word': 'rum',\n", " 'start': 1610,\n", " 'end': 1613},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.042922124,\n", " 'index': 385,\n", " 'word': '##or',\n", " 'start': 1613,\n", " 'end': 1615},\n", " {'entity': 'r10:root',\n", " 'score': 0.04341532,\n", " 'index': 386,\n", " 'word': 'started',\n", " 'start': 1616,\n", " 'end': 1623},\n", " {'entity': 'r10:root',\n", " 'score': 0.054916393,\n", " 'index': 390,\n", " 'word': 'prove',\n", " 'start': 1632,\n", " 'end': 1637},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.03691343,\n", " 'index': 391,\n", " 'word': 'them',\n", " 'start': 1638,\n", " 'end': 1642},\n", " {'entity': 'r8:arg2|atr',\n", " 'score': 0.030148128,\n", " 'index': 392,\n", " 'word': 'wrong',\n", " 'start': 1643,\n", " 'end': 1648},\n", " {'entity': 'r8:argM|adv',\n", " 'score': 0.028092569,\n", " 'index': 393,\n", " 'word': 'on',\n", " 'start': 1649,\n", " 'end': 1651},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.039598208,\n", " 'index': 398,\n", " 'word': 'we',\n", " 'start': 1666,\n", " 'end': 1668},\n", " {'entity': 'r10:root',\n", " 'score': 0.051345967,\n", " 'index': 399,\n", " 'word': 'decided',\n", " 'start': 1669,\n", " 'end': 1676},\n", " {'entity': 'r10:root',\n", " 'score': 0.049982574,\n", " 'index': 401,\n", " 'word': 'take',\n", " 'start': 1680,\n", " 'end': 1684},\n", " {'entity': 'r10:root',\n", " 'score': 0.03342623,\n", " 'index': 403,\n", " 'word': 'picture',\n", " 'start': 1693,\n", " 'end': 1700},\n", " {'entity': 'r10:root',\n", " 'score': 0.044696685,\n", " 'index': 405,\n", " 'word': 'making',\n", " 'start': 1702,\n", " 'end': 1708},\n", " {'entity': 'r10:root',\n", " 'score': 0.03436369,\n", " 'index': 406,\n", " 'word': 'sure',\n", " 'start': 1709,\n", " 'end': 1713},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.03859569,\n", " 'index': 407,\n", " 'word': 'it',\n", " 'start': 1714,\n", " 'end': 1716},\n", " {'entity': 'r10:root',\n", " 'score': 0.0494223,\n", " 'index': 408,\n", " 'word': 'was',\n", " 'start': 1717,\n", " 'end': 1720},\n", " {'entity': 'r10:root',\n", " 'score': 0.02929864,\n", " 'index': 412,\n", " 'word': 'summer',\n", " 'start': 1733,\n", " 'end': 1739},\n", " {'entity': 'r10:root',\n", " 'score': 0.029290752,\n", " 'index': 413,\n", " 'word': 'day',\n", " 'start': 1740,\n", " 'end': 1743},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.035252254,\n", " 'index': 415,\n", " 'word': 'Mali',\n", " 'start': 1745,\n", " 'end': 1749},\n", " {'entity': 'r8:arg1|tem',\n", " 'score': 0.036368903,\n", " 'index': 416,\n", " 'word': '##n',\n", " 'start': 1749,\n", " 'end': 1750},\n", " {'entity': 'r10:root',\n", " 'score': 0.03298566,\n", " 'index': 419,\n", " 'word': 'team',\n", " 'start': 1753,\n", " 'end': 1757},\n", " {'entity': 'r10:root',\n", " 'score': 0.047498554,\n", " 'index': 420,\n", " 'word': 'captured',\n", " 'start': 1758,\n", " 'end': 1766},\n", " {'entity': 'r10:root',\n", " 'score': 0.03272342,\n", " 'index': 424,\n", " 'word': 'photo',\n", " 'start': 1778,\n", " 'end': 1783},\n", " {'entity': 'r10:root',\n", " 'score': 0.03630522,\n", " 'index': 425,\n", " 'word': 'using',\n", " 'start': 1784,\n", " 'end': 1789},\n", " {'entity': 'r10:root',\n", " 'score': 0.030974206,\n", " 'index': 434,\n", " 'word': 'With',\n", " 'start': 1832,\n", " 'end': 1836},\n", " {'entity': 'r10:root',\n", " 'score': 0.0339595,\n", " 'index': 437,\n", " 'word': 'you',\n", " 'start': 1849,\n", " 'end': 1852},\n", " {'entity': 'r10:root',\n", " 'score': 0.040549394,\n", " 'index': 438,\n", " 'word': 'can',\n", " 'start': 1853,\n", " 'end': 1856},\n", " {'entity': 'r10:root',\n", " 'score': 0.042945094,\n", " 'index': 439,\n", " 'word': 'disc',\n", " 'start': 1857,\n", " 'end': 1861},\n", " {'entity': 'r10:root',\n", " 'score': 0.04496943,\n", " 'index': 440,\n", " 'word': '##ern',\n", " 'start': 1861,\n", " 'end': 1864},\n", " {'entity': 'r10:root',\n", " 'score': 0.033708826,\n", " 'index': 441,\n", " 'word': 'things',\n", " 'start': 1865,\n", " 'end': 1871},\n", " {'entity': 'r10:root',\n", " 'score': 0.03150944,\n", " 'index': 442,\n", " 'word': 'in',\n", " 'start': 1872,\n", " 'end': 1874},\n", " {'entity': 'r10:root',\n", " 'score': 0.033492185,\n", " 'index': 455,\n", " 'word': 'which',\n", " 'start': 1927,\n", " 'end': 1932},\n", " {'entity': 'r10:root',\n", " 'score': 0.044837367,\n", " 'index': 456,\n", " 'word': 'means',\n", " 'start': 1933,\n", " 'end': 1938},\n", " {'entity': 'r10:root',\n", " 'score': 0.03930233,\n", " 'index': 458,\n", " 'word': 'there',\n", " 'start': 1942,\n", " 'end': 1947},\n", " {'entity': 'r10:root',\n", " 'score': 0.046380974,\n", " 'index': 459,\n", " 'word': 'were',\n", " 'start': 1948,\n", " 'end': 1952},\n", " {'entity': 'r10:root',\n", " 'score': 0.03521819,\n", " 'index': 461,\n", " 'word': 'signs',\n", " 'start': 1957,\n", " 'end': 1962},\n", " {'entity': 'r10:root',\n", " 'score': 0.03437573,\n", " 'index': 465,\n", " 'word': 'you',\n", " 'start': 1972,\n", " 'end': 1975},\n", " {'entity': 'r10:root',\n", " 'score': 0.040590625,\n", " 'index': 466,\n", " 'word': 'could',\n", " 'start': 1976,\n", " 'end': 1981},\n", " {'entity': 'r10:root',\n", " 'score': 0.0304144,\n", " 'index': 467,\n", " 'word': 'easily',\n", " 'start': 1982,\n", " 'end': 1988},\n", " {'entity': 'r10:root',\n", " 'score': 0.048064638,\n", " 'index': 468,\n", " 'word': 'see',\n", " 'start': 1989,\n", " 'end': 1992},\n", " {'entity': 'r10:root',\n", " 'score': 0.032679703,\n", " 'index': 469,\n", " 'word': 'what',\n", " 'start': 1993,\n", " 'end': 1997},\n", " {'entity': 'r10:root',\n", " 'score': 0.037320852,\n", " 'index': 470,\n", " 'word': 'they',\n", " 'start': 1998,\n", " 'end': 2002},\n", " {'entity': 'r10:root',\n", " 'score': 0.047912642,\n", " 'index': 471,\n", " 'word': 'were',\n", " 'start': 2003,\n", " 'end': 2007},\n", " {'entity': 'r10:root',\n", " 'score': 0.034035392,\n", " 'index': 473,\n", " 'word': 'What',\n", " 'start': 2009,\n", " 'end': 2013},\n", " {'entity': 'r10:root',\n", " 'score': 0.036057852,\n", " 'index': 475,\n", " 'word': 'picture',\n", " 'start': 2018,\n", " 'end': 2025},\n", " {'entity': 'r10:root',\n", " 'score': 0.047634307,\n", " 'index': 476,\n", " 'word': 'showed',\n", " 'start': 2026,\n", " 'end': 2032},\n", " {'entity': 'r10:root',\n", " 'score': 0.04636057,\n", " 'index': 477,\n", " 'word': 'was',\n", " 'start': 2033,\n", " 'end': 2036},\n", " {'entity': 'r10:root',\n", " 'score': 0.03571351,\n", " 'index': 479,\n", " 'word': 'but',\n", " 'start': 2041,\n", " 'end': 2044},\n", " {'entity': 'r10:root',\n", " 'score': 0.035763755,\n", " 'index': 480,\n", " 'word': '##te',\n", " 'start': 2044,\n", " 'end': 2046},\n", " {'entity': 'r10:root',\n", " 'score': 0.03430853,\n", " 'index': 484,\n", " 'word': 'which',\n", " 'start': 2056,\n", " 'end': 2061},\n", " {'entity': 'r10:root',\n", " 'score': 0.04987236,\n", " 'index': 485,\n", " 'word': 'are',\n", " 'start': 2062,\n", " 'end': 2065},\n", " {'entity': 'r10:root',\n", " 'score': 0.03454835,\n", " 'index': 486,\n", " 'word': 'land',\n", " 'start': 2066,\n", " 'end': 2070},\n", " {'entity': 'r10:root',\n", " 'score': 0.035414834,\n", " 'index': 487,\n", " 'word': '##form',\n", " 'start': 2070,\n", " 'end': 2074},\n", " {'entity': 'r10:root',\n", " 'score': 0.038371794,\n", " 'index': 488,\n", " 'word': '##s',\n", " 'start': 2074,\n", " 'end': 2075},\n", " {'entity': 'r10:root',\n", " 'score': 0.03423538,\n", " 'index': 489,\n", " 'word': 'common',\n", " 'start': 2076,\n", " 'end': 2082},\n", " {'entity': 'r10:root',\n", " 'score': 0.032777835,\n", " 'index': 490,\n", " 'word': 'around',\n", " 'start': 2083,\n", " 'end': 2089}]" ] }, "execution_count": 109, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"mbruton/spa_en_mBERT\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"mbruton/spa_en_mBERT\")\n", "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "\n", "ner_results = nlp(text)\n", "ner_results\n", "\n" ] }, { "cell_type": "code", "execution_count": 110, "id": "b46505fd-da7c-40a5-b666-29e67291ffc7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "r0:arg1|tem 1\n", "r0:root 2\n", "r10:root 113\n", "r1:arg1|tem 1\n", "r1:arg2|atr 1\n", "r1:root 1\n", "r2:arg1|pat 1\n", "r2:arg2|ben 1\n", "r3:root 1\n", "r4:arg0|agt 1\n", "r4:arg1|tem 2\n", "r4:arg2|atr 2\n", "r5:arg0|agt 1\n", "r5:arg1|tem 2\n", "r5:root 1\n", "r6:arg1|tem 1\n", "r6:root 1\n", "r7:arg0|agt 4\n", "r7:arg1|pat 2\n", "r7:arg1|tem 2\n", "r7:argM|tmp 1\n", "r7:root 3\n", "r8:arg0|agt 3\n", "r8:arg1|pat 3\n", "r8:arg1|tem 29\n", "r8:arg2|atr 7\n", "r8:argM|adv 7\n", "r8:root 2\n", "r9:root 9\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word \n", "r0:arg1|tem you 1\n", "r0:root 1\n", " re 1\n", "r10:root ##adow 1\n", " ##ct 1\n", " ##d 2\n", " ##emble 1\n", " ##er 1\n", " ##ern 1\n", " ##form 1\n", " ##ing 1\n", " ##n 2\n", " ##rra 1\n", " ##s 3\n", " ##te 1\n", " Mali 1\n", " What 1\n", " With 1\n", " alien 1\n", " announced 1\n", " are 1\n", " around 1\n", " at 1\n", " be 1\n", " became 1\n", " but 1\n", " by 1\n", " can 1\n", " captured 1\n", " common 1\n", " could 1\n", " day 1\n", " decided 1\n", " did 1\n", " disc 1\n", " easily 1\n", " engage 1\n", " figure 1\n", " for 1\n", " formation 1\n", " formed 1\n", " had 1\n", " head 1\n", " hidden 1\n", " image 1\n", " in 2\n", " it 2\n", " land 2\n", " later 1\n", " like 1\n", " look 1\n", " made 2\n", " making 1\n", " marking 1\n", " mean 1\n", " means 1\n", " meant 1\n", " monument 1\n", " photo 1\n", " picture 3\n", " prove 1\n", " res 1\n", " reveal 1\n", " revealed 1\n", " rock 1\n", " see 2\n", " sh 1\n", " shot 1\n", " showed 1\n", " signs 1\n", " started 1\n", " summer 1\n", " sure 3\n", " t 2\n", " take 1\n", " team 2\n", " there 1\n", " they 1\n", " things 1\n", " thought 1\n", " to 1\n", " took 1\n", " using 1\n", " was 5\n", " wasn 1\n", " we 2\n", " were 4\n", " what 1\n", " which 2\n", " ye 1\n", " you 2\n", "r1:arg1|tem you 1\n", "r1:arg2|atr able 1\n", "r1:root be 1\n", "r2:arg1|pat story 1\n", "r2:arg2|ben me 1\n", "r3:root is 1\n", "r4:arg0|agt ##s 1\n", "r4:arg1|tem civili 1\n", " which 1\n", "r4:arg2|atr evidence 1\n", " there 1\n", "r5:arg0|agt defender 1\n", "r5:arg1|tem ##zation 1\n", " life 1\n", "r5:root was 1\n", "r6:arg1|tem there 1\n", "r6:root is 1\n", "r7:arg0|agt by 1\n", " people 1\n", " scientists 1\n", " spacecraft 1\n", "r7:arg1|pat it 1\n", " planet 1\n", "r7:arg1|tem ##form 1\n", " face 1\n", "r7:argM|tmp for 1\n", "r7:root created 1\n", " hide 1\n", " wanted 1\n", "r8:arg0|agt Viking 1\n", " We 1\n", " it 1\n", "r8:arg1|pat like 1\n", " photos 1\n", " shot 1\n", "r8:arg1|tem ##n 1\n", " ##ness 1\n", " ##or 1\n", " Mali 1\n", " Michael 1\n", " We 1\n", " attention 1\n", " by 1\n", " face 1\n", " it 5\n", " land 1\n", " one 1\n", " picture 1\n", " public 1\n", " rum 1\n", " scientists 1\n", " that 3\n", " them 1\n", " we 3\n", " which 2\n", "r8:arg2|atr clear 1\n", " evidence 1\n", " i 1\n", " sharp 1\n", " way 1\n", " wrong 2\n", "r8:argM|adv around 1\n", " at 1\n", " in 1\n", " just 1\n", " on 2\n", " soon 1\n", "r8:root thought 1\n", " was 1\n", "r9:root ##ling 1\n", " ##pping 1\n", " ##rc 1\n", " appeared 1\n", " ci 1\n", " decided 1\n", " make 1\n", " spotted 1\n", " take 1\n", "dtype: int64" ] }, "execution_count": 110, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "with open(\"50 mbrutonspa_en_mBERT.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()" ] }, { "cell_type": "markdown", "id": "9bd22ec6-6192-4ff8-b482-b7fdbfac312c", "metadata": {}, "source": [ "## 51 BSC-LT/roberta_model_for_anonimization" ] }, { "cell_type": "code", "execution_count": 132, "id": "6cd4a769-e99c-453f-83c6-447c4c69ec1b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'entity': 'S-ORG',\n", " 'score': 0.9931144,\n", " 'index': 10,\n", " 'word': 'ĠNASA',\n", " 'start': 38,\n", " 'end': 42},\n", " {'entity': 'S-OTH',\n", " 'score': 0.9904603,\n", " 'index': 21,\n", " 'word': 'ĠCara',\n", " 'start': 94,\n", " 'end': 98},\n", " {'entity': 'S-LOC',\n", " 'score': 0.9779458,\n", " 'index': 23,\n", " 'word': 'ĠMarte',\n", " 'start': 102,\n", " 'end': 107},\n", " {'entity': 'S-LOC',\n", " 'score': 0.9959857,\n", " 'index': 34,\n", " 'word': 'ĠMarte',\n", " 'start': 156,\n", " 'end': 161},\n", " {'entity': 'B-OTH',\n", " 'score': 0.9917984,\n", " 'index': 57,\n", " 'word': 'ĠVik',\n", " 'start': 269,\n", " 'end': 272},\n", " {'entity': 'I-OTH',\n", " 'score': 0.9757009,\n", " 'index': 58,\n", " 'word': 'ing',\n", " 'start': 272,\n", " 'end': 275},\n", " {'entity': 'E-OTH',\n", " 'score': 0.9817541,\n", " 'index': 59,\n", " 'word': 'Ġ1',\n", " 'start': 276,\n", " 'end': 277},\n", " {'entity': 'S-LOC',\n", " 'score': 0.99622846,\n", " 'index': 96,\n", " 'word': 'ĠC',\n", " 'start': 503,\n", " 'end': 504},\n", " {'entity': 'S-ORG',\n", " 'score': 0.990908,\n", " 'index': 182,\n", " 'word': 'ĠNASA',\n", " 'start': 919,\n", " 'end': 923},\n", " {'entity': 'S-LOC',\n", " 'score': 0.9959258,\n", " 'index': 188,\n", " 'word': 'ĠMarte',\n", " 'start': 947,\n", " 'end': 952},\n", " {'entity': 'S-LOC',\n", " 'score': 0.99050736,\n", " 'index': 199,\n", " 'word': 'ĠMarte',\n", " 'start': 979,\n", " 'end': 984},\n", " {'entity': 'S-LOC',\n", " 'score': 0.9929755,\n", " 'index': 255,\n", " 'word': 'ĠMarte',\n", " 'start': 1252,\n", " 'end': 1257},\n", " {'entity': 'S-ORG',\n", " 'score': 0.9937528,\n", " 'index': 275,\n", " 'word': 'ĠNASA',\n", " 'start': 1357,\n", " 'end': 1361},\n", " {'entity': 'S-LOC',\n", " 'score': 0.9968929,\n", " 'index': 284,\n", " 'word': 'ĠMarte',\n", " 'start': 1412,\n", " 'end': 1417},\n", " {'entity': 'B-PER',\n", " 'score': 0.99851614,\n", " 'index': 308,\n", " 'word': 'ĠMichael',\n", " 'start': 1515,\n", " 'end': 1522},\n", " {'entity': 'E-PER',\n", " 'score': 0.9982216,\n", " 'index': 309,\n", " 'word': 'ĠMal',\n", " 'start': 1523,\n", " 'end': 1526},\n", " {'entity': 'E-PER',\n", " 'score': 0.9815487,\n", " 'index': 310,\n", " 'word': 'in',\n", " 'start': 1526,\n", " 'end': 1528},\n", " {'entity': 'B-ORG',\n", " 'score': 0.79048795,\n", " 'index': 317,\n", " 'word': 'ĠMars',\n", " 'start': 1554,\n", " 'end': 1558},\n", " {'entity': 'E-ORG',\n", " 'score': 0.807663,\n", " 'index': 318,\n", " 'word': 'ĠOr',\n", " 'start': 1559,\n", " 'end': 1561},\n", " {'entity': 'E-ORG',\n", " 'score': 0.6876012,\n", " 'index': 319,\n", " 'word': 'bi',\n", " 'start': 1561,\n", " 'end': 1563},\n", " {'entity': 'E-ORG',\n", " 'score': 0.60779643,\n", " 'index': 320,\n", " 'word': 'ter',\n", " 'start': 1563,\n", " 'end': 1566},\n", " {'entity': 'S-OTH',\n", " 'score': 0.9860491,\n", " 'index': 336,\n", " 'word': 'ĠVik',\n", " 'start': 1655,\n", " 'end': 1658},\n", " {'entity': 'S-PER',\n", " 'score': 0.9582744,\n", " 'index': 427,\n", " 'word': 'ĠMal',\n", " 'start': 2082,\n", " 'end': 2085}]" ] }, "execution_count": 132, "metadata": {}, "output_type": "execute_result" } ], "source": [ "text=\"\"\"Entonces, si eres un científico de la NASA, deberías poder contarme toda la historia sobre la Cara en Marte, que obviamente es evidencia de que hay vida en Marte y que la cara fue creada por extraterrestres, ¿correcto?\" No. Hace veinticinco años, nuestra nave espacial Viking 1 estaba dando vueltas alrededor del planeta, tomando fotografías, cuando detectó la sombra de un rostro humano. Los científicos estadounidenses descubrieron que se trataba simplemente de otra mesa marciana, común alrededor de Cydonia, solo que ésta tenía sombras. Eso lo hizo parecer un faraón egipcio. Muy pocos días después, revelamos la imagen para que todos la vieran, y nos aseguramos de notar que era una enorme formación rocosa que simplemente se parecía a una cabeza y un rostro humanos, pero todo era. formado por sombras Sólo lo anunciamos porque pensamos que sería una buena manera de involucrar al público con los hallazgos de la NASA y atraer la atención a Marte, y así fue.\n", "\n", "El rostro de Marte pronto se convirtió en un ícono pop; filmada en películas, apareció en libros, revistas, programas de radio y en las colas de las cajas de las tiendas de comestibles durante 25 años. Algunas personas pensaron que la forma natural del relieve era evidencia de vida en Marte, y que los científicos queríamos ocultarla, pero en realidad, los defensores del presupuesto de la NASA desearían que hubiera una civilización antigua en Marte. Decidimos tomar otra foto solo para asegurarnos de no estar equivocados, el 5 de abril de 1998. Michael Malin y su equipo de cámara de Mars Orbiter tomaron una fotografía que era diez veces más nítida que las fotografías originales del Viking, revelando una forma de relieve natural, que No significaba ningún monumento alienígena. \"Pero esa imagen no era muy clara en absoluto, lo que podría significar que las marcas alienígenas estaban ocultas por la neblina\" Bueno, no, sí, ese rumor comenzó, pero para demostrar que estaban equivocados, el 8 de abril de 2001 decidimos tomar otra fotografía, asegurándonos de que Era un día de verano sin nubes. El equipo de Malin capturó una fotografía asombrosa utilizando la revolución máxima absoluta de la cámara. Con esta cámara puedes discernir cosas en una imagen digital, 3 veces más grande que el tamaño de un píxel, lo que significa que si hubiera señales de vida, podrías ver fácilmente cuáles eran. Lo que la imagen mostraba era la colina o mesa, que son accidentes geográficos comunes en el oeste americano.\"\"\"\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(\"BSC-LT/roberta_model_for_anonimization\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"BSC-LT/roberta_model_for_anonimization\")\n", "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "\n", "ner_results = nlp(text)\n", "ner_results" ] }, { "cell_type": "code", "execution_count": 133, "id": "b46cee5f-5626-4ef8-b5de-8c57f5a142a8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "entity\n", "B-ORG 1\n", "B-OTH 1\n", "B-PER 1\n", "E-ORG 3\n", "E-OTH 1\n", "E-PER 2\n", "I-OTH 1\n", "S-LOC 7\n", "S-ORG 3\n", "S-OTH 2\n", "S-PER 1\n", "dtype: int64\n" ] }, { "data": { "text/plain": [ "entity word \n", "B-ORG ĠMars 1\n", "B-OTH ĠVik 1\n", "B-PER ĠMichael 1\n", "E-ORG bi 1\n", " ter 1\n", " ĠOr 1\n", "E-OTH Ġ1 1\n", "E-PER in 1\n", " ĠMal 1\n", "I-OTH ing 1\n", "S-LOC ĠC 1\n", " ĠMarte 6\n", "S-ORG ĠNASA 3\n", "S-OTH ĠCara 1\n", " ĠVik 1\n", "S-PER ĠMal 1\n", "dtype: int64" ] }, "execution_count": 133, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "with open(\"51 BSC-LTroberta_model_for_anonimization.json\", encoding='utf-8') as f:\n", " data = json.load(f)\n", "aux=obtener_dataframe(data)\n", "aux\n", "print(aux.groupby(['entity']).size())\n", "aux.groupby(['entity', 'word']).size()" ] }, { "cell_type": "markdown", "id": "7c1f2159-3e79-4d4b-a4dc-b690d563763d", "metadata": {}, "source": [ "## 52 aymurai/anonymizer-beto-cased-flair" ] }, { "cell_type": "code", "execution_count": 111, "id": "432508a1-4f16-494f-9ae2-6a206d2dbe2e", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\NW\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n" ] }, { "ename": "OSError", "evalue": "aymurai/anonymizer-beto-cased-flair does not appear to have a file named config.json. Checkout 'https://huggingface.co/aymurai/anonymizer-beto-cased-flair/tree/main' for available files.", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mHTTPError\u001b[0m Traceback (most recent call last)", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\utils\\_errors.py:304\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[1;34m(response, endpoint_name)\u001b[0m\n\u001b[0;32m 303\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 304\u001b[0m response\u001b[38;5;241m.\u001b[39mraise_for_status()\n\u001b[0;32m 305\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError \u001b[38;5;28;01mas\u001b[39;00m e:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\requests\\models.py:1024\u001b[0m, in \u001b[0;36mResponse.raise_for_status\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1023\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m http_error_msg:\n\u001b[1;32m-> 1024\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(http_error_msg, response\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m)\n", "\u001b[1;31mHTTPError\u001b[0m: 404 Client Error: Not Found for url: https://huggingface.co/aymurai/anonymizer-beto-cased-flair/resolve/main/config.json", "\nThe above exception was the direct cause of the following exception:\n", "\u001b[1;31mEntryNotFoundError\u001b[0m Traceback (most recent call last)", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\utils\\hub.py:399\u001b[0m, in \u001b[0;36mcached_file\u001b[1;34m(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)\u001b[0m\n\u001b[0;32m 397\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 398\u001b[0m \u001b[38;5;66;03m# Load from URL or cache if already cached\u001b[39;00m\n\u001b[1;32m--> 399\u001b[0m resolved_file \u001b[38;5;241m=\u001b[39m hf_hub_download(\n\u001b[0;32m 400\u001b[0m path_or_repo_id,\n\u001b[0;32m 401\u001b[0m filename,\n\u001b[0;32m 402\u001b[0m subfolder\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(subfolder) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m subfolder,\n\u001b[0;32m 403\u001b[0m repo_type\u001b[38;5;241m=\u001b[39mrepo_type,\n\u001b[0;32m 404\u001b[0m revision\u001b[38;5;241m=\u001b[39mrevision,\n\u001b[0;32m 405\u001b[0m cache_dir\u001b[38;5;241m=\u001b[39mcache_dir,\n\u001b[0;32m 406\u001b[0m user_agent\u001b[38;5;241m=\u001b[39muser_agent,\n\u001b[0;32m 407\u001b[0m force_download\u001b[38;5;241m=\u001b[39mforce_download,\n\u001b[0;32m 408\u001b[0m proxies\u001b[38;5;241m=\u001b[39mproxies,\n\u001b[0;32m 409\u001b[0m resume_download\u001b[38;5;241m=\u001b[39mresume_download,\n\u001b[0;32m 410\u001b[0m token\u001b[38;5;241m=\u001b[39mtoken,\n\u001b[0;32m 411\u001b[0m local_files_only\u001b[38;5;241m=\u001b[39mlocal_files_only,\n\u001b[0;32m 412\u001b[0m )\n\u001b[0;32m 413\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m GatedRepoError \u001b[38;5;28;01mas\u001b[39;00m e:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\utils\\_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.._inner_fn\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 112\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[1;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1221\u001b[0m, in \u001b[0;36mhf_hub_download\u001b[1;34m(repo_id, filename, subfolder, repo_type, revision, library_name, library_version, cache_dir, local_dir, user_agent, force_download, proxies, etag_timeout, token, local_files_only, headers, endpoint, legacy_cache_layout, resume_download, force_filename, local_dir_use_symlinks)\u001b[0m\n\u001b[0;32m 1220\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1221\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _hf_hub_download_to_cache_dir(\n\u001b[0;32m 1222\u001b[0m \u001b[38;5;66;03m# Destination\u001b[39;00m\n\u001b[0;32m 1223\u001b[0m cache_dir\u001b[38;5;241m=\u001b[39mcache_dir,\n\u001b[0;32m 1224\u001b[0m \u001b[38;5;66;03m# File info\u001b[39;00m\n\u001b[0;32m 1225\u001b[0m repo_id\u001b[38;5;241m=\u001b[39mrepo_id,\n\u001b[0;32m 1226\u001b[0m filename\u001b[38;5;241m=\u001b[39mfilename,\n\u001b[0;32m 1227\u001b[0m repo_type\u001b[38;5;241m=\u001b[39mrepo_type,\n\u001b[0;32m 1228\u001b[0m revision\u001b[38;5;241m=\u001b[39mrevision,\n\u001b[0;32m 1229\u001b[0m \u001b[38;5;66;03m# HTTP info\u001b[39;00m\n\u001b[0;32m 1230\u001b[0m headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[0;32m 1231\u001b[0m proxies\u001b[38;5;241m=\u001b[39mproxies,\n\u001b[0;32m 1232\u001b[0m etag_timeout\u001b[38;5;241m=\u001b[39metag_timeout,\n\u001b[0;32m 1233\u001b[0m endpoint\u001b[38;5;241m=\u001b[39mendpoint,\n\u001b[0;32m 1234\u001b[0m \u001b[38;5;66;03m# Additional options\u001b[39;00m\n\u001b[0;32m 1235\u001b[0m local_files_only\u001b[38;5;241m=\u001b[39mlocal_files_only,\n\u001b[0;32m 1236\u001b[0m force_download\u001b[38;5;241m=\u001b[39mforce_download,\n\u001b[0;32m 1237\u001b[0m )\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1282\u001b[0m, in \u001b[0;36m_hf_hub_download_to_cache_dir\u001b[1;34m(cache_dir, repo_id, filename, repo_type, revision, headers, proxies, etag_timeout, endpoint, local_files_only, force_download)\u001b[0m\n\u001b[0;32m 1280\u001b[0m \u001b[38;5;66;03m# Try to get metadata (etag, commit_hash, url, size) from the server.\u001b[39;00m\n\u001b[0;32m 1281\u001b[0m \u001b[38;5;66;03m# If we can't, a HEAD request error is returned.\u001b[39;00m\n\u001b[1;32m-> 1282\u001b[0m (url_to_download, etag, commit_hash, expected_size, head_call_error) \u001b[38;5;241m=\u001b[39m _get_metadata_or_catch_error(\n\u001b[0;32m 1283\u001b[0m repo_id\u001b[38;5;241m=\u001b[39mrepo_id,\n\u001b[0;32m 1284\u001b[0m filename\u001b[38;5;241m=\u001b[39mfilename,\n\u001b[0;32m 1285\u001b[0m repo_type\u001b[38;5;241m=\u001b[39mrepo_type,\n\u001b[0;32m 1286\u001b[0m revision\u001b[38;5;241m=\u001b[39mrevision,\n\u001b[0;32m 1287\u001b[0m endpoint\u001b[38;5;241m=\u001b[39mendpoint,\n\u001b[0;32m 1288\u001b[0m proxies\u001b[38;5;241m=\u001b[39mproxies,\n\u001b[0;32m 1289\u001b[0m etag_timeout\u001b[38;5;241m=\u001b[39metag_timeout,\n\u001b[0;32m 1290\u001b[0m headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[0;32m 1291\u001b[0m local_files_only\u001b[38;5;241m=\u001b[39mlocal_files_only,\n\u001b[0;32m 1292\u001b[0m storage_folder\u001b[38;5;241m=\u001b[39mstorage_folder,\n\u001b[0;32m 1293\u001b[0m relative_filename\u001b[38;5;241m=\u001b[39mrelative_filename,\n\u001b[0;32m 1294\u001b[0m )\n\u001b[0;32m 1296\u001b[0m \u001b[38;5;66;03m# etag can be None for several reasons:\u001b[39;00m\n\u001b[0;32m 1297\u001b[0m \u001b[38;5;66;03m# 1. we passed local_files_only.\u001b[39;00m\n\u001b[0;32m 1298\u001b[0m \u001b[38;5;66;03m# 2. we don't have a connection\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 1304\u001b[0m \u001b[38;5;66;03m# If the specified revision is a commit hash, look inside \"snapshots\".\u001b[39;00m\n\u001b[0;32m 1305\u001b[0m \u001b[38;5;66;03m# If the specified revision is a branch or tag, look inside \"refs\".\u001b[39;00m\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1722\u001b[0m, in \u001b[0;36m_get_metadata_or_catch_error\u001b[1;34m(repo_id, filename, repo_type, revision, endpoint, proxies, etag_timeout, headers, local_files_only, relative_filename, storage_folder)\u001b[0m\n\u001b[0;32m 1721\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m-> 1722\u001b[0m metadata \u001b[38;5;241m=\u001b[39m get_hf_file_metadata(url\u001b[38;5;241m=\u001b[39murl, proxies\u001b[38;5;241m=\u001b[39mproxies, timeout\u001b[38;5;241m=\u001b[39metag_timeout, headers\u001b[38;5;241m=\u001b[39mheaders)\n\u001b[0;32m 1723\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m EntryNotFoundError \u001b[38;5;28;01mas\u001b[39;00m http_error:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\utils\\_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.._inner_fn\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 112\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[1;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1645\u001b[0m, in \u001b[0;36mget_hf_file_metadata\u001b[1;34m(url, token, proxies, timeout, library_name, library_version, user_agent, headers)\u001b[0m\n\u001b[0;32m 1644\u001b[0m \u001b[38;5;66;03m# Retrieve metadata\u001b[39;00m\n\u001b[1;32m-> 1645\u001b[0m r \u001b[38;5;241m=\u001b[39m _request_wrapper(\n\u001b[0;32m 1646\u001b[0m method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHEAD\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 1647\u001b[0m url\u001b[38;5;241m=\u001b[39murl,\n\u001b[0;32m 1648\u001b[0m headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[0;32m 1649\u001b[0m allow_redirects\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m 1650\u001b[0m follow_relative_redirects\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[0;32m 1651\u001b[0m proxies\u001b[38;5;241m=\u001b[39mproxies,\n\u001b[0;32m 1652\u001b[0m timeout\u001b[38;5;241m=\u001b[39mtimeout,\n\u001b[0;32m 1653\u001b[0m )\n\u001b[0;32m 1654\u001b[0m hf_raise_for_status(r)\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:372\u001b[0m, in \u001b[0;36m_request_wrapper\u001b[1;34m(method, url, follow_relative_redirects, **params)\u001b[0m\n\u001b[0;32m 371\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m follow_relative_redirects:\n\u001b[1;32m--> 372\u001b[0m response \u001b[38;5;241m=\u001b[39m _request_wrapper(\n\u001b[0;32m 373\u001b[0m method\u001b[38;5;241m=\u001b[39mmethod,\n\u001b[0;32m 374\u001b[0m url\u001b[38;5;241m=\u001b[39murl,\n\u001b[0;32m 375\u001b[0m follow_relative_redirects\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m 376\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams,\n\u001b[0;32m 377\u001b[0m )\n\u001b[0;32m 379\u001b[0m \u001b[38;5;66;03m# If redirection, we redirect only relative paths.\u001b[39;00m\n\u001b[0;32m 380\u001b[0m \u001b[38;5;66;03m# This is useful in case of a renamed repository.\u001b[39;00m\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:396\u001b[0m, in \u001b[0;36m_request_wrapper\u001b[1;34m(method, url, follow_relative_redirects, **params)\u001b[0m\n\u001b[0;32m 395\u001b[0m response \u001b[38;5;241m=\u001b[39m get_session()\u001b[38;5;241m.\u001b[39mrequest(method\u001b[38;5;241m=\u001b[39mmethod, url\u001b[38;5;241m=\u001b[39murl, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams)\n\u001b[1;32m--> 396\u001b[0m hf_raise_for_status(response)\n\u001b[0;32m 397\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\utils\\_errors.py:315\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[1;34m(response, endpoint_name)\u001b[0m\n\u001b[0;32m 314\u001b[0m message \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mstatus_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m Client Error.\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEntry Not Found for url: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m--> 315\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m EntryNotFoundError(message, response) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[0;32m 317\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m error_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGatedRepo\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n", "\u001b[1;31mEntryNotFoundError\u001b[0m: 404 Client Error. (Request ID: Root=1-6689e043-48997c467113ccac7d10140e;22177459-16a6-4735-8391-d445af8a31ee)\n\nEntry Not Found for url: https://huggingface.co/aymurai/anonymizer-beto-cased-flair/resolve/main/config.json.", "\nThe above exception was the direct cause of the following exception:\n", "\u001b[1;31mOSError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[111], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoTokenizer, AutoModelForTokenClassification\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124maymurai/anonymizer-beto-cased-flair\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124maymurai/anonymizer-beto-cased-flair\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m nlp \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:934\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 931\u001b[0m trust_remote_code \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtrust_remote_code\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[0;32m 932\u001b[0m code_revision \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcode_revision\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m--> 934\u001b[0m config_dict, unused_kwargs \u001b[38;5;241m=\u001b[39m PretrainedConfig\u001b[38;5;241m.\u001b[39mget_config_dict(pretrained_model_name_or_path, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 935\u001b[0m has_remote_code \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config_dict \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoConfig\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m 936\u001b[0m has_local_code \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config_dict \u001b[38;5;129;01mand\u001b[39;00m config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;129;01min\u001b[39;00m CONFIG_MAPPING\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\configuration_utils.py:632\u001b[0m, in \u001b[0;36mPretrainedConfig.get_config_dict\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 630\u001b[0m original_kwargs \u001b[38;5;241m=\u001b[39m copy\u001b[38;5;241m.\u001b[39mdeepcopy(kwargs)\n\u001b[0;32m 631\u001b[0m \u001b[38;5;66;03m# Get config dict associated with the base config file\u001b[39;00m\n\u001b[1;32m--> 632\u001b[0m config_dict, kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_get_config_dict(pretrained_model_name_or_path, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 633\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_commit_hash\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config_dict:\n\u001b[0;32m 634\u001b[0m original_kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_commit_hash\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_commit_hash\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\configuration_utils.py:689\u001b[0m, in \u001b[0;36mPretrainedConfig._get_config_dict\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 685\u001b[0m configuration_file \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_configuration_file\u001b[39m\u001b[38;5;124m\"\u001b[39m, CONFIG_NAME) \u001b[38;5;28;01mif\u001b[39;00m gguf_file \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m gguf_file\n\u001b[0;32m 687\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 688\u001b[0m \u001b[38;5;66;03m# Load from local folder or from cache or download from model Hub and cache\u001b[39;00m\n\u001b[1;32m--> 689\u001b[0m resolved_config_file \u001b[38;5;241m=\u001b[39m cached_file(\n\u001b[0;32m 690\u001b[0m pretrained_model_name_or_path,\n\u001b[0;32m 691\u001b[0m configuration_file,\n\u001b[0;32m 692\u001b[0m cache_dir\u001b[38;5;241m=\u001b[39mcache_dir,\n\u001b[0;32m 693\u001b[0m force_download\u001b[38;5;241m=\u001b[39mforce_download,\n\u001b[0;32m 694\u001b[0m proxies\u001b[38;5;241m=\u001b[39mproxies,\n\u001b[0;32m 695\u001b[0m resume_download\u001b[38;5;241m=\u001b[39mresume_download,\n\u001b[0;32m 696\u001b[0m local_files_only\u001b[38;5;241m=\u001b[39mlocal_files_only,\n\u001b[0;32m 697\u001b[0m token\u001b[38;5;241m=\u001b[39mtoken,\n\u001b[0;32m 698\u001b[0m user_agent\u001b[38;5;241m=\u001b[39muser_agent,\n\u001b[0;32m 699\u001b[0m revision\u001b[38;5;241m=\u001b[39mrevision,\n\u001b[0;32m 700\u001b[0m subfolder\u001b[38;5;241m=\u001b[39msubfolder,\n\u001b[0;32m 701\u001b[0m _commit_hash\u001b[38;5;241m=\u001b[39mcommit_hash,\n\u001b[0;32m 702\u001b[0m )\n\u001b[0;32m 703\u001b[0m commit_hash \u001b[38;5;241m=\u001b[39m extract_commit_hash(resolved_config_file, commit_hash)\n\u001b[0;32m 704\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m:\n\u001b[0;32m 705\u001b[0m \u001b[38;5;66;03m# Raise any environment error raise by `cached_file`. It will have a helpful error message adapted to\u001b[39;00m\n\u001b[0;32m 706\u001b[0m \u001b[38;5;66;03m# the original exception.\u001b[39;00m\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\utils\\hub.py:453\u001b[0m, in \u001b[0;36mcached_file\u001b[1;34m(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)\u001b[0m\n\u001b[0;32m 451\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m revision \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 452\u001b[0m revision \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmain\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m--> 453\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[0;32m 454\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_or_repo_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m does not appear to have a file named \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfull_filename\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m. Checkout \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 455\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttps://huggingface.co/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_or_repo_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/tree/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrevision\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m for available files.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 456\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[0;32m 457\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m 458\u001b[0m resolved_file \u001b[38;5;241m=\u001b[39m _get_cache_file_to_return(path_or_repo_id, full_filename, cache_dir, revision)\n", "\u001b[1;31mOSError\u001b[0m: aymurai/anonymizer-beto-cased-flair does not appear to have a file named config.json. Checkout 'https://huggingface.co/aymurai/anonymizer-beto-cased-flair/tree/main' for available files." ] } ], "source": [ "\n", "\n", "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"aymurai/anonymizer-beto-cased-flair\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"aymurai/anonymizer-beto-cased-flair\")\n", "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "\n", "ner_results = nlp(text)\n", "ner_results\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "51143fc8-92cc-4c5f-88c5-e295452c3b6a", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "8b203e13-8095-4a04-b490-78677533942b", "metadata": {}, "source": [ "## 53 google-bert/bert-large-cased-whole-word-masking" ] }, { "cell_type": "code", "execution_count": 113, "id": "f03e9cdc-f9d1-4eca-b059-89e3955e22c5", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-large-cased-whole-word-masking and are newly initialized: ['classifier.bias', 'classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "data": { "text/plain": [ "[{'entity': 'LABEL_1',\n", " 'score': 0.7333207,\n", " 'index': 1,\n", " 'word': 'So',\n", " 'start': 0,\n", " 'end': 2},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7897263,\n", " 'index': 2,\n", " 'word': ',',\n", " 'start': 2,\n", " 'end': 3},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.61105645,\n", " 'index': 3,\n", " 'word': 'if',\n", " 'start': 4,\n", " 'end': 6},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6179384,\n", " 'index': 4,\n", " 'word': 'you',\n", " 'start': 7,\n", " 'end': 10},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.51219755,\n", " 'index': 5,\n", " 'word': \"'\",\n", " 'start': 10,\n", " 'end': 11},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7380822,\n", " 'index': 6,\n", " 'word': 're',\n", " 'start': 11,\n", " 'end': 13},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.65777117,\n", " 'index': 7,\n", " 'word': 'a',\n", " 'start': 14,\n", " 'end': 15},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.53536695,\n", " 'index': 8,\n", " 'word': 'NASA',\n", " 'start': 16,\n", " 'end': 20},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5658676,\n", " 'index': 9,\n", " 'word': 'scientist',\n", " 'start': 21,\n", " 'end': 30},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7322473,\n", " 'index': 10,\n", " 'word': ',',\n", " 'start': 30,\n", " 'end': 31},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.63136214,\n", " 'index': 11,\n", " 'word': 'you',\n", " 'start': 32,\n", " 'end': 35},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6426238,\n", " 'index': 12,\n", " 'word': 'should',\n", " 'start': 36,\n", " 'end': 42},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5245657,\n", " 'index': 13,\n", " 'word': 'be',\n", " 'start': 43,\n", " 'end': 45},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.6180293,\n", " 'index': 14,\n", " 'word': 'able',\n", " 'start': 46,\n", " 'end': 50},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5129229,\n", " 'index': 15,\n", " 'word': 'to',\n", " 'start': 51,\n", " 'end': 53},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.52398765,\n", " 'index': 16,\n", " 'word': 'tell',\n", " 'start': 54,\n", " 'end': 58},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.60980695,\n", " 'index': 17,\n", " 'word': 'me',\n", " 'start': 59,\n", " 'end': 61},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.61398715,\n", " 'index': 18,\n", " 'word': 'the',\n", " 'start': 62,\n", " 'end': 65},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7106103,\n", " 'index': 19,\n", " 'word': 'whole',\n", " 'start': 66,\n", " 'end': 71},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6377772,\n", " 'index': 20,\n", " 'word': 'story',\n", " 'start': 72,\n", " 'end': 77},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6342355,\n", " 'index': 21,\n", " 'word': 'about',\n", " 'start': 78,\n", " 'end': 83},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6174517,\n", " 'index': 22,\n", " 'word': 'the',\n", " 'start': 84,\n", " 'end': 87},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6358006,\n", " 'index': 23,\n", " 'word': 'Face',\n", " 'start': 88,\n", " 'end': 92},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5708014,\n", " 'index': 24,\n", " 'word': 'On',\n", " 'start': 93,\n", " 'end': 95},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.6247826,\n", " 'index': 25,\n", " 'word': 'Mars',\n", " 'start': 96,\n", " 'end': 100},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.79095966,\n", " 'index': 26,\n", " 'word': ',',\n", " 'start': 100,\n", " 'end': 101},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.67212445,\n", " 'index': 27,\n", " 'word': 'which',\n", " 'start': 102,\n", " 'end': 107},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7683761,\n", " 'index': 28,\n", " 'word': 'obviously',\n", " 'start': 108,\n", " 'end': 117},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5656431,\n", " 'index': 29,\n", " 'word': 'is',\n", " 'start': 118,\n", " 'end': 120},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5710077,\n", " 'index': 30,\n", " 'word': 'evidence',\n", " 'start': 121,\n", " 'end': 129},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6670097,\n", " 'index': 31,\n", " 'word': 'that',\n", " 'start': 130,\n", " 'end': 134},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6908868,\n", " 'index': 32,\n", " 'word': 'there',\n", " 'start': 135,\n", " 'end': 140},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.59239066,\n", " 'index': 33,\n", " 'word': 'is',\n", " 'start': 141,\n", " 'end': 143},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7072231,\n", " 'index': 34,\n", " 'word': 'life',\n", " 'start': 144,\n", " 'end': 148},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.64615613,\n", " 'index': 35,\n", " 'word': 'on',\n", " 'start': 149,\n", " 'end': 151},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.60538524,\n", " 'index': 36,\n", " 'word': 'Mars',\n", " 'start': 152,\n", " 'end': 156},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.83394796,\n", " 'index': 37,\n", " 'word': ',',\n", " 'start': 156,\n", " 'end': 157},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6373182,\n", " 'index': 38,\n", " 'word': 'and',\n", " 'start': 158,\n", " 'end': 161},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.732228,\n", " 'index': 39,\n", " 'word': 'that',\n", " 'start': 162,\n", " 'end': 166},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.55135024,\n", " 'index': 40,\n", " 'word': 'the',\n", " 'start': 167,\n", " 'end': 170},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5254089,\n", " 'index': 41,\n", " 'word': 'face',\n", " 'start': 171,\n", " 'end': 175},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.61335075,\n", " 'index': 42,\n", " 'word': 'was',\n", " 'start': 176,\n", " 'end': 179},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74078745,\n", " 'index': 43,\n", " 'word': 'created',\n", " 'start': 180,\n", " 'end': 187},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7517857,\n", " 'index': 44,\n", " 'word': 'by',\n", " 'start': 188,\n", " 'end': 190},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.510251,\n", " 'index': 45,\n", " 'word': 'aliens',\n", " 'start': 191,\n", " 'end': 197},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7680581,\n", " 'index': 46,\n", " 'word': ',',\n", " 'start': 197,\n", " 'end': 198},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5634767,\n", " 'index': 47,\n", " 'word': 'correct',\n", " 'start': 199,\n", " 'end': 206},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.66794556,\n", " 'index': 48,\n", " 'word': '?',\n", " 'start': 206,\n", " 'end': 207},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6206324,\n", " 'index': 49,\n", " 'word': '\"',\n", " 'start': 207,\n", " 'end': 208},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.69091505,\n", " 'index': 50,\n", " 'word': 'No',\n", " 'start': 209,\n", " 'end': 211},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.82622874,\n", " 'index': 51,\n", " 'word': ',',\n", " 'start': 211,\n", " 'end': 212},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6986776,\n", " 'index': 52,\n", " 'word': 'twenty',\n", " 'start': 213,\n", " 'end': 219},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.587492,\n", " 'index': 53,\n", " 'word': 'five',\n", " 'start': 220,\n", " 'end': 224},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5896044,\n", " 'index': 54,\n", " 'word': 'years',\n", " 'start': 225,\n", " 'end': 230},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5274402,\n", " 'index': 55,\n", " 'word': 'ago',\n", " 'start': 231,\n", " 'end': 234},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.65922695,\n", " 'index': 56,\n", " 'word': ',',\n", " 'start': 234,\n", " 'end': 235},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.56813127,\n", " 'index': 57,\n", " 'word': 'our',\n", " 'start': 236,\n", " 'end': 239},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.58480746,\n", " 'index': 58,\n", " 'word': 'Viking',\n", " 'start': 240,\n", " 'end': 246},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.6104148,\n", " 'index': 59,\n", " 'word': '1',\n", " 'start': 247,\n", " 'end': 248},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.60260046,\n", " 'index': 60,\n", " 'word': 'spacecraft',\n", " 'start': 249,\n", " 'end': 259},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6534109,\n", " 'index': 61,\n", " 'word': 'was',\n", " 'start': 260,\n", " 'end': 263},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.70453274,\n", " 'index': 62,\n", " 'word': 'circling',\n", " 'start': 264,\n", " 'end': 272},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.59796387,\n", " 'index': 63,\n", " 'word': 'the',\n", " 'start': 273,\n", " 'end': 276},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.57720464,\n", " 'index': 64,\n", " 'word': 'planet',\n", " 'start': 277,\n", " 'end': 283},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.81815565,\n", " 'index': 65,\n", " 'word': ',',\n", " 'start': 283,\n", " 'end': 284},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6159785,\n", " 'index': 66,\n", " 'word': 'snapping',\n", " 'start': 285,\n", " 'end': 293},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5178575,\n", " 'index': 67,\n", " 'word': 'photos',\n", " 'start': 294,\n", " 'end': 300},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7781755,\n", " 'index': 68,\n", " 'word': ',',\n", " 'start': 300,\n", " 'end': 301},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.58284646,\n", " 'index': 69,\n", " 'word': 'when',\n", " 'start': 302,\n", " 'end': 306},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6206713,\n", " 'index': 70,\n", " 'word': 'it',\n", " 'start': 307,\n", " 'end': 309},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7120494,\n", " 'index': 71,\n", " 'word': 'spotted',\n", " 'start': 310,\n", " 'end': 317},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5560274,\n", " 'index': 72,\n", " 'word': 'the',\n", " 'start': 318,\n", " 'end': 321},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6203119,\n", " 'index': 73,\n", " 'word': 'shadowy',\n", " 'start': 322,\n", " 'end': 329},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5314929,\n", " 'index': 74,\n", " 'word': 'like',\n", " 'start': 330,\n", " 'end': 334},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5537888,\n", " 'index': 75,\n", " 'word': '##ness',\n", " 'start': 334,\n", " 'end': 338},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.605519,\n", " 'index': 76,\n", " 'word': 'of',\n", " 'start': 339,\n", " 'end': 341},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6460082,\n", " 'index': 77,\n", " 'word': 'a',\n", " 'start': 342,\n", " 'end': 343},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5739153,\n", " 'index': 78,\n", " 'word': 'human',\n", " 'start': 344,\n", " 'end': 349},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.57964504,\n", " 'index': 79,\n", " 'word': 'face',\n", " 'start': 350,\n", " 'end': 354},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.67440754,\n", " 'index': 80,\n", " 'word': '.',\n", " 'start': 354,\n", " 'end': 355},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5449105,\n", " 'index': 81,\n", " 'word': 'Us',\n", " 'start': 356,\n", " 'end': 358},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5455108,\n", " 'index': 82,\n", " 'word': 'scientists',\n", " 'start': 359,\n", " 'end': 369},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.64505374,\n", " 'index': 83,\n", " 'word': 'figured',\n", " 'start': 370,\n", " 'end': 377},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5902839,\n", " 'index': 84,\n", " 'word': 'out',\n", " 'start': 378,\n", " 'end': 381},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.65235263,\n", " 'index': 85,\n", " 'word': 'that',\n", " 'start': 382,\n", " 'end': 386},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6934572,\n", " 'index': 86,\n", " 'word': 'it',\n", " 'start': 387,\n", " 'end': 389},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.66032475,\n", " 'index': 87,\n", " 'word': 'was',\n", " 'start': 390,\n", " 'end': 393},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.57023394,\n", " 'index': 88,\n", " 'word': 'just',\n", " 'start': 394,\n", " 'end': 398},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.60775185,\n", " 'index': 89,\n", " 'word': 'another',\n", " 'start': 399,\n", " 'end': 406},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.70214206,\n", " 'index': 90,\n", " 'word': 'Martian',\n", " 'start': 407,\n", " 'end': 414},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6318522,\n", " 'index': 91,\n", " 'word': 'me',\n", " 'start': 415,\n", " 'end': 417},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6979068,\n", " 'index': 92,\n", " 'word': '##sa',\n", " 'start': 417,\n", " 'end': 419},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7611192,\n", " 'index': 93,\n", " 'word': ',',\n", " 'start': 419,\n", " 'end': 420},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72079974,\n", " 'index': 94,\n", " 'word': 'common',\n", " 'start': 421,\n", " 'end': 427},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6922783,\n", " 'index': 95,\n", " 'word': 'around',\n", " 'start': 428,\n", " 'end': 434},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5261554,\n", " 'index': 96,\n", " 'word': 'Cy',\n", " 'start': 435,\n", " 'end': 437},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5093499,\n", " 'index': 97,\n", " 'word': '##don',\n", " 'start': 437,\n", " 'end': 440},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5632856,\n", " 'index': 98,\n", " 'word': '##ia',\n", " 'start': 440,\n", " 'end': 442},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7464533,\n", " 'index': 99,\n", " 'word': ',',\n", " 'start': 442,\n", " 'end': 443},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.54192007,\n", " 'index': 100,\n", " 'word': 'only',\n", " 'start': 444,\n", " 'end': 448},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.67456824,\n", " 'index': 101,\n", " 'word': 'this',\n", " 'start': 449,\n", " 'end': 453},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6420857,\n", " 'index': 102,\n", " 'word': 'one',\n", " 'start': 454,\n", " 'end': 457},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6792174,\n", " 'index': 103,\n", " 'word': 'had',\n", " 'start': 458,\n", " 'end': 461},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5339916,\n", " 'index': 104,\n", " 'word': 'shadows',\n", " 'start': 462,\n", " 'end': 469},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.643012,\n", " 'index': 105,\n", " 'word': 'that',\n", " 'start': 470,\n", " 'end': 474},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5314313,\n", " 'index': 106,\n", " 'word': 'made',\n", " 'start': 475,\n", " 'end': 479},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.59911275,\n", " 'index': 107,\n", " 'word': 'it',\n", " 'start': 480,\n", " 'end': 482},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5045175,\n", " 'index': 108,\n", " 'word': 'look',\n", " 'start': 483,\n", " 'end': 487},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6535474,\n", " 'index': 109,\n", " 'word': 'like',\n", " 'start': 488,\n", " 'end': 492},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5992719,\n", " 'index': 110,\n", " 'word': 'an',\n", " 'start': 493,\n", " 'end': 495},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.56163996,\n", " 'index': 111,\n", " 'word': 'Egypt',\n", " 'start': 496,\n", " 'end': 501},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.53756046,\n", " 'index': 112,\n", " 'word': '##ion',\n", " 'start': 501,\n", " 'end': 504},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.501027,\n", " 'index': 113,\n", " 'word': 'Ph',\n", " 'start': 505,\n", " 'end': 507},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6108546,\n", " 'index': 114,\n", " 'word': '##ara',\n", " 'start': 507,\n", " 'end': 510},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6155596,\n", " 'index': 115,\n", " 'word': '##oh',\n", " 'start': 510,\n", " 'end': 512},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.6744254,\n", " 'index': 116,\n", " 'word': '.',\n", " 'start': 512,\n", " 'end': 513},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6499993,\n", " 'index': 117,\n", " 'word': 'Very',\n", " 'start': 514,\n", " 'end': 518},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.64182097,\n", " 'index': 118,\n", " 'word': 'few',\n", " 'start': 519,\n", " 'end': 522},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.59322584,\n", " 'index': 119,\n", " 'word': 'days',\n", " 'start': 523,\n", " 'end': 527},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7502963,\n", " 'index': 120,\n", " 'word': 'later',\n", " 'start': 528,\n", " 'end': 533},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.74712145,\n", " 'index': 121,\n", " 'word': ',',\n", " 'start': 533,\n", " 'end': 534},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.66655517,\n", " 'index': 122,\n", " 'word': 'we',\n", " 'start': 535,\n", " 'end': 537},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.8063372,\n", " 'index': 123,\n", " 'word': 'revealed',\n", " 'start': 538,\n", " 'end': 546},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.57793516,\n", " 'index': 124,\n", " 'word': 'the',\n", " 'start': 547,\n", " 'end': 550},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.65657383,\n", " 'index': 125,\n", " 'word': 'image',\n", " 'start': 551,\n", " 'end': 556},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.54032975,\n", " 'index': 126,\n", " 'word': 'for',\n", " 'start': 557,\n", " 'end': 560},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6904917,\n", " 'index': 127,\n", " 'word': 'all',\n", " 'start': 561,\n", " 'end': 564},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5489662,\n", " 'index': 128,\n", " 'word': 'to',\n", " 'start': 565,\n", " 'end': 567},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5115776,\n", " 'index': 129,\n", " 'word': 'see',\n", " 'start': 568,\n", " 'end': 571},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.8207864,\n", " 'index': 130,\n", " 'word': ',',\n", " 'start': 571,\n", " 'end': 572},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5277151,\n", " 'index': 131,\n", " 'word': 'and',\n", " 'start': 573,\n", " 'end': 576},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6339588,\n", " 'index': 132,\n", " 'word': 'we',\n", " 'start': 577,\n", " 'end': 579},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5440828,\n", " 'index': 133,\n", " 'word': 'made',\n", " 'start': 580,\n", " 'end': 584},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.6284659,\n", " 'index': 134,\n", " 'word': 'sure',\n", " 'start': 585,\n", " 'end': 589},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5280459,\n", " 'index': 135,\n", " 'word': 'to',\n", " 'start': 590,\n", " 'end': 592},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.6735471,\n", " 'index': 136,\n", " 'word': 'note',\n", " 'start': 593,\n", " 'end': 597},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6085519,\n", " 'index': 137,\n", " 'word': 'that',\n", " 'start': 598,\n", " 'end': 602},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.69036937,\n", " 'index': 138,\n", " 'word': 'it',\n", " 'start': 603,\n", " 'end': 605},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7547675,\n", " 'index': 139,\n", " 'word': 'was',\n", " 'start': 606,\n", " 'end': 609},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7263249,\n", " 'index': 140,\n", " 'word': 'a',\n", " 'start': 610,\n", " 'end': 611},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7987393,\n", " 'index': 141,\n", " 'word': 'huge',\n", " 'start': 612,\n", " 'end': 616},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5952804,\n", " 'index': 142,\n", " 'word': 'rock',\n", " 'start': 617,\n", " 'end': 621},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.56155205,\n", " 'index': 143,\n", " 'word': 'formation',\n", " 'start': 622,\n", " 'end': 631},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6473168,\n", " 'index': 144,\n", " 'word': 'that',\n", " 'start': 632,\n", " 'end': 636},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5825665,\n", " 'index': 145,\n", " 'word': 'just',\n", " 'start': 637,\n", " 'end': 641},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.53029037,\n", " 'index': 146,\n", " 'word': 'resembled',\n", " 'start': 642,\n", " 'end': 651},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6174586,\n", " 'index': 147,\n", " 'word': 'a',\n", " 'start': 652,\n", " 'end': 653},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5836265,\n", " 'index': 148,\n", " 'word': 'human',\n", " 'start': 654,\n", " 'end': 659},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5860484,\n", " 'index': 149,\n", " 'word': 'head',\n", " 'start': 660,\n", " 'end': 664},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.51445687,\n", " 'index': 150,\n", " 'word': 'and',\n", " 'start': 665,\n", " 'end': 668},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5693341,\n", " 'index': 151,\n", " 'word': 'face',\n", " 'start': 669,\n", " 'end': 673},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.81818646,\n", " 'index': 152,\n", " 'word': ',',\n", " 'start': 673,\n", " 'end': 674},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6410124,\n", " 'index': 153,\n", " 'word': 'but',\n", " 'start': 675,\n", " 'end': 678},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6700791,\n", " 'index': 154,\n", " 'word': 'all',\n", " 'start': 679,\n", " 'end': 682},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5005887,\n", " 'index': 155,\n", " 'word': 'of',\n", " 'start': 683,\n", " 'end': 685},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71792203,\n", " 'index': 156,\n", " 'word': 'it',\n", " 'start': 686,\n", " 'end': 688},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7390527,\n", " 'index': 157,\n", " 'word': 'was',\n", " 'start': 689,\n", " 'end': 692},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7305893,\n", " 'index': 158,\n", " 'word': 'formed',\n", " 'start': 693,\n", " 'end': 699},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73789006,\n", " 'index': 159,\n", " 'word': 'by',\n", " 'start': 700,\n", " 'end': 702},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5128689,\n", " 'index': 160,\n", " 'word': 'shadows',\n", " 'start': 703,\n", " 'end': 710},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.67443025,\n", " 'index': 161,\n", " 'word': '.',\n", " 'start': 710,\n", " 'end': 711},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7642682,\n", " 'index': 162,\n", " 'word': 'We',\n", " 'start': 712,\n", " 'end': 714},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.658385,\n", " 'index': 163,\n", " 'word': 'only',\n", " 'start': 715,\n", " 'end': 719},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.70881367,\n", " 'index': 164,\n", " 'word': 'announced',\n", " 'start': 720,\n", " 'end': 729},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7176985,\n", " 'index': 165,\n", " 'word': 'it',\n", " 'start': 730,\n", " 'end': 732},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6474057,\n", " 'index': 166,\n", " 'word': 'because',\n", " 'start': 733,\n", " 'end': 740},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6813441,\n", " 'index': 167,\n", " 'word': 'we',\n", " 'start': 741,\n", " 'end': 743},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5333977,\n", " 'index': 168,\n", " 'word': 'thought',\n", " 'start': 744,\n", " 'end': 751},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7404348,\n", " 'index': 169,\n", " 'word': 'it',\n", " 'start': 752,\n", " 'end': 754},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.78600377,\n", " 'index': 170,\n", " 'word': 'would',\n", " 'start': 755,\n", " 'end': 760},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.58613163,\n", " 'index': 171,\n", " 'word': 'be',\n", " 'start': 761,\n", " 'end': 763},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.58903766,\n", " 'index': 172,\n", " 'word': 'a',\n", " 'start': 764,\n", " 'end': 765},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.69468915,\n", " 'index': 173,\n", " 'word': 'good',\n", " 'start': 766,\n", " 'end': 770},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6279253,\n", " 'index': 174,\n", " 'word': 'way',\n", " 'start': 771,\n", " 'end': 774},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.68305737,\n", " 'index': 175,\n", " 'word': 'to',\n", " 'start': 775,\n", " 'end': 777},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6546071,\n", " 'index': 176,\n", " 'word': 'engage',\n", " 'start': 778,\n", " 'end': 784},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.66487354,\n", " 'index': 177,\n", " 'word': 'the',\n", " 'start': 785,\n", " 'end': 788},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.771617,\n", " 'index': 178,\n", " 'word': 'public',\n", " 'start': 789,\n", " 'end': 795},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5646253,\n", " 'index': 179,\n", " 'word': 'with',\n", " 'start': 796,\n", " 'end': 800},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.55737066,\n", " 'index': 180,\n", " 'word': 'NASA',\n", " 'start': 801,\n", " 'end': 805},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5705753,\n", " 'index': 181,\n", " 'word': \"'\",\n", " 'start': 805,\n", " 'end': 806},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.59688944,\n", " 'index': 182,\n", " 'word': 's',\n", " 'start': 806,\n", " 'end': 807},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.563748,\n", " 'index': 183,\n", " 'word': 'findings',\n", " 'start': 808,\n", " 'end': 816},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.83522767,\n", " 'index': 184,\n", " 'word': ',',\n", " 'start': 816,\n", " 'end': 817},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.663783,\n", " 'index': 185,\n", " 'word': 'and',\n", " 'start': 818,\n", " 'end': 821},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.56715816,\n", " 'index': 186,\n", " 'word': 'at',\n", " 'start': 822,\n", " 'end': 824},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6969365,\n", " 'index': 187,\n", " 'word': '##rra',\n", " 'start': 824,\n", " 'end': 827},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6686826,\n", " 'index': 188,\n", " 'word': '##ct',\n", " 'start': 827,\n", " 'end': 829},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6169446,\n", " 'index': 189,\n", " 'word': 'attention',\n", " 'start': 830,\n", " 'end': 839},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72824407,\n", " 'index': 190,\n", " 'word': 'to',\n", " 'start': 840,\n", " 'end': 842},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5518576,\n", " 'index': 191,\n", " 'word': 'Mars',\n", " 'start': 843,\n", " 'end': 847},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6182919,\n", " 'index': 192,\n", " 'word': '-',\n", " 'start': 847,\n", " 'end': 848},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.60027426,\n", " 'index': 193,\n", " 'word': '-',\n", " 'start': 848,\n", " 'end': 849},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6326655,\n", " 'index': 194,\n", " 'word': 'and',\n", " 'start': 850,\n", " 'end': 853},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.62892854,\n", " 'index': 195,\n", " 'word': 'it',\n", " 'start': 854,\n", " 'end': 856},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.59196657,\n", " 'index': 196,\n", " 'word': 'did',\n", " 'start': 857,\n", " 'end': 860},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.67441356,\n", " 'index': 197,\n", " 'word': '.',\n", " 'start': 860,\n", " 'end': 861},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.58260876,\n", " 'index': 198,\n", " 'word': 'The',\n", " 'start': 863,\n", " 'end': 866},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.61155885,\n", " 'index': 199,\n", " 'word': 'face',\n", " 'start': 867,\n", " 'end': 871},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5333523,\n", " 'index': 200,\n", " 'word': 'on',\n", " 'start': 872,\n", " 'end': 874},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5905245,\n", " 'index': 201,\n", " 'word': 'Mars',\n", " 'start': 875,\n", " 'end': 879},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5726181,\n", " 'index': 202,\n", " 'word': 'soon',\n", " 'start': 880,\n", " 'end': 884},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.57342,\n", " 'index': 203,\n", " 'word': 'became',\n", " 'start': 885,\n", " 'end': 891},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6202053,\n", " 'index': 204,\n", " 'word': 'a',\n", " 'start': 892,\n", " 'end': 893},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5770226,\n", " 'index': 205,\n", " 'word': 'pop',\n", " 'start': 894,\n", " 'end': 897},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.67616236,\n", " 'index': 206,\n", " 'word': 'icon',\n", " 'start': 898,\n", " 'end': 902},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6696246,\n", " 'index': 207,\n", " 'word': ';',\n", " 'start': 902,\n", " 'end': 903},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6367805,\n", " 'index': 208,\n", " 'word': 'shot',\n", " 'start': 904,\n", " 'end': 908},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.54797834,\n", " 'index': 209,\n", " 'word': 'in',\n", " 'start': 909,\n", " 'end': 911},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6188789,\n", " 'index': 210,\n", " 'word': 'movies',\n", " 'start': 912,\n", " 'end': 918},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7146302,\n", " 'index': 211,\n", " 'word': ',',\n", " 'start': 918,\n", " 'end': 919},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5945637,\n", " 'index': 212,\n", " 'word': 'appeared',\n", " 'start': 920,\n", " 'end': 928},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6160087,\n", " 'index': 213,\n", " 'word': 'in',\n", " 'start': 929,\n", " 'end': 931},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.66332275,\n", " 'index': 214,\n", " 'word': 'books',\n", " 'start': 932,\n", " 'end': 937},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7474604,\n", " 'index': 215,\n", " 'word': ',',\n", " 'start': 937,\n", " 'end': 938},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6651565,\n", " 'index': 216,\n", " 'word': 'magazines',\n", " 'start': 939,\n", " 'end': 948},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7416081,\n", " 'index': 217,\n", " 'word': ',',\n", " 'start': 948,\n", " 'end': 949},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6236832,\n", " 'index': 218,\n", " 'word': 'radio',\n", " 'start': 950,\n", " 'end': 955},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5897754,\n", " 'index': 219,\n", " 'word': 'talk',\n", " 'start': 956,\n", " 'end': 960},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.58853656,\n", " 'index': 220,\n", " 'word': 'shows',\n", " 'start': 961,\n", " 'end': 966},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7553832,\n", " 'index': 221,\n", " 'word': ',',\n", " 'start': 966,\n", " 'end': 967},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.623144,\n", " 'index': 222,\n", " 'word': 'and',\n", " 'start': 968,\n", " 'end': 971},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.50205296,\n", " 'index': 223,\n", " 'word': 'haunted',\n", " 'start': 972,\n", " 'end': 979},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5905772,\n", " 'index': 224,\n", " 'word': 'grocery',\n", " 'start': 980,\n", " 'end': 987},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.64581704,\n", " 'index': 225,\n", " 'word': 'store',\n", " 'start': 988,\n", " 'end': 993},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.57227784,\n", " 'index': 226,\n", " 'word': 'check',\n", " 'start': 994,\n", " 'end': 999},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.62471133,\n", " 'index': 227,\n", " 'word': '##out',\n", " 'start': 999,\n", " 'end': 1002},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5380063,\n", " 'index': 228,\n", " 'word': 'lines',\n", " 'start': 1003,\n", " 'end': 1008},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7226769,\n", " 'index': 229,\n", " 'word': 'for',\n", " 'start': 1009,\n", " 'end': 1012},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6841871,\n", " 'index': 230,\n", " 'word': '25',\n", " 'start': 1013,\n", " 'end': 1015},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.59963995,\n", " 'index': 231,\n", " 'word': 'years',\n", " 'start': 1016,\n", " 'end': 1021},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.67440766,\n", " 'index': 232,\n", " 'word': '.',\n", " 'start': 1021,\n", " 'end': 1022},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.51083964,\n", " 'index': 233,\n", " 'word': 'Some',\n", " 'start': 1023,\n", " 'end': 1027},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.61685973,\n", " 'index': 234,\n", " 'word': 'people',\n", " 'start': 1028,\n", " 'end': 1034},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72570676,\n", " 'index': 235,\n", " 'word': 'thought',\n", " 'start': 1035,\n", " 'end': 1042},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.65166354,\n", " 'index': 236,\n", " 'word': 'the',\n", " 'start': 1043,\n", " 'end': 1046},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.526593,\n", " 'index': 237,\n", " 'word': 'natural',\n", " 'start': 1047,\n", " 'end': 1054},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5936935,\n", " 'index': 238,\n", " 'word': 'land',\n", " 'start': 1055,\n", " 'end': 1059},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.53759766,\n", " 'index': 239,\n", " 'word': '##form',\n", " 'start': 1059,\n", " 'end': 1063},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.68580973,\n", " 'index': 240,\n", " 'word': 'was',\n", " 'start': 1064,\n", " 'end': 1067},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.59179384,\n", " 'index': 241,\n", " 'word': 'evidence',\n", " 'start': 1068,\n", " 'end': 1076},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6524172,\n", " 'index': 242,\n", " 'word': 'of',\n", " 'start': 1077,\n", " 'end': 1079},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6464424,\n", " 'index': 243,\n", " 'word': 'life',\n", " 'start': 1080,\n", " 'end': 1084},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.69582665,\n", " 'index': 244,\n", " 'word': 'on',\n", " 'start': 1085,\n", " 'end': 1087},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5751102,\n", " 'index': 245,\n", " 'word': 'Mars',\n", " 'start': 1088,\n", " 'end': 1092},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.8460579,\n", " 'index': 246,\n", " 'word': ',',\n", " 'start': 1092,\n", " 'end': 1093},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6263853,\n", " 'index': 247,\n", " 'word': 'and',\n", " 'start': 1094,\n", " 'end': 1097},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6964252,\n", " 'index': 248,\n", " 'word': 'that',\n", " 'start': 1098,\n", " 'end': 1102},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5494476,\n", " 'index': 249,\n", " 'word': 'us',\n", " 'start': 1103,\n", " 'end': 1105},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.56226,\n", " 'index': 250,\n", " 'word': 'scientists',\n", " 'start': 1106,\n", " 'end': 1116},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7735633,\n", " 'index': 251,\n", " 'word': 'wanted',\n", " 'start': 1117,\n", " 'end': 1123},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.78176767,\n", " 'index': 252,\n", " 'word': 'to',\n", " 'start': 1124,\n", " 'end': 1126},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7204006,\n", " 'index': 253,\n", " 'word': 'hide',\n", " 'start': 1127,\n", " 'end': 1131},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.77582467,\n", " 'index': 254,\n", " 'word': 'it',\n", " 'start': 1132,\n", " 'end': 1134},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.81556284,\n", " 'index': 255,\n", " 'word': ',',\n", " 'start': 1134,\n", " 'end': 1135},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.73906016,\n", " 'index': 256,\n", " 'word': 'but',\n", " 'start': 1136,\n", " 'end': 1139},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.72601235,\n", " 'index': 257,\n", " 'word': 'really',\n", " 'start': 1140,\n", " 'end': 1146},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7567177,\n", " 'index': 258,\n", " 'word': ',',\n", " 'start': 1146,\n", " 'end': 1147},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6613144,\n", " 'index': 259,\n", " 'word': 'the',\n", " 'start': 1148,\n", " 'end': 1151},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5439451,\n", " 'index': 260,\n", " 'word': 'defenders',\n", " 'start': 1152,\n", " 'end': 1161},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.52611476,\n", " 'index': 261,\n", " 'word': 'of',\n", " 'start': 1162,\n", " 'end': 1164},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.61969113,\n", " 'index': 262,\n", " 'word': 'the',\n", " 'start': 1165,\n", " 'end': 1168},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5504615,\n", " 'index': 263,\n", " 'word': 'NASA',\n", " 'start': 1169,\n", " 'end': 1173},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6306784,\n", " 'index': 264,\n", " 'word': 'budget',\n", " 'start': 1174,\n", " 'end': 1180},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.61308634,\n", " 'index': 265,\n", " 'word': 'wish',\n", " 'start': 1181,\n", " 'end': 1185},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.68934727,\n", " 'index': 266,\n", " 'word': 'there',\n", " 'start': 1186,\n", " 'end': 1191},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7155088,\n", " 'index': 267,\n", " 'word': 'was',\n", " 'start': 1192,\n", " 'end': 1195},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.57382756,\n", " 'index': 268,\n", " 'word': 'ancient',\n", " 'start': 1196,\n", " 'end': 1203},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.56942445,\n", " 'index': 269,\n", " 'word': 'civilization',\n", " 'start': 1204,\n", " 'end': 1216},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6762141,\n", " 'index': 270,\n", " 'word': 'on',\n", " 'start': 1217,\n", " 'end': 1219},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5380082,\n", " 'index': 271,\n", " 'word': 'Mars',\n", " 'start': 1220,\n", " 'end': 1224},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.67440796,\n", " 'index': 272,\n", " 'word': '.',\n", " 'start': 1224,\n", " 'end': 1225},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6982536,\n", " 'index': 273,\n", " 'word': 'We',\n", " 'start': 1226,\n", " 'end': 1228},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.58452165,\n", " 'index': 274,\n", " 'word': 'decided',\n", " 'start': 1229,\n", " 'end': 1236},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6876171,\n", " 'index': 275,\n", " 'word': 'to',\n", " 'start': 1237,\n", " 'end': 1239},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5167381,\n", " 'index': 276,\n", " 'word': 'take',\n", " 'start': 1240,\n", " 'end': 1244},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5906027,\n", " 'index': 277,\n", " 'word': 'another',\n", " 'start': 1245,\n", " 'end': 1252},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.561436,\n", " 'index': 278,\n", " 'word': 'shot',\n", " 'start': 1253,\n", " 'end': 1257},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.57617104,\n", " 'index': 279,\n", " 'word': 'just',\n", " 'start': 1258,\n", " 'end': 1262},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.59515345,\n", " 'index': 280,\n", " 'word': 'to',\n", " 'start': 1263,\n", " 'end': 1265},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.585306,\n", " 'index': 281,\n", " 'word': 'make',\n", " 'start': 1266,\n", " 'end': 1270},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.55697155,\n", " 'index': 282,\n", " 'word': 'sure',\n", " 'start': 1271,\n", " 'end': 1275},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6123277,\n", " 'index': 283,\n", " 'word': 'we',\n", " 'start': 1276,\n", " 'end': 1278},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5167826,\n", " 'index': 284,\n", " 'word': 'weren',\n", " 'start': 1279,\n", " 'end': 1284},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.51131904,\n", " 'index': 285,\n", " 'word': \"'\",\n", " 'start': 1284,\n", " 'end': 1285},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5814942,\n", " 'index': 286,\n", " 'word': 't',\n", " 'start': 1285,\n", " 'end': 1286},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5043164,\n", " 'index': 287,\n", " 'word': 'wrong',\n", " 'start': 1287,\n", " 'end': 1292},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7838791,\n", " 'index': 288,\n", " 'word': ',',\n", " 'start': 1292,\n", " 'end': 1293},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6923021,\n", " 'index': 289,\n", " 'word': 'on',\n", " 'start': 1294,\n", " 'end': 1296},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5097389,\n", " 'index': 290,\n", " 'word': 'April',\n", " 'start': 1297,\n", " 'end': 1302},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5005761,\n", " 'index': 291,\n", " 'word': '5',\n", " 'start': 1303,\n", " 'end': 1304},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.64288056,\n", " 'index': 292,\n", " 'word': ',',\n", " 'start': 1304,\n", " 'end': 1305},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.53319114,\n", " 'index': 293,\n", " 'word': '1998',\n", " 'start': 1306,\n", " 'end': 1310},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.57352495,\n", " 'index': 294,\n", " 'word': '.',\n", " 'start': 1310,\n", " 'end': 1311},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6711837,\n", " 'index': 295,\n", " 'word': 'Michael',\n", " 'start': 1312,\n", " 'end': 1319},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5190688,\n", " 'index': 296,\n", " 'word': 'Mali',\n", " 'start': 1320,\n", " 'end': 1324},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.547607,\n", " 'index': 297,\n", " 'word': '##n',\n", " 'start': 1324,\n", " 'end': 1325},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5587775,\n", " 'index': 298,\n", " 'word': 'and',\n", " 'start': 1326,\n", " 'end': 1329},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.53947425,\n", " 'index': 299,\n", " 'word': 'his',\n", " 'start': 1330,\n", " 'end': 1333},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.57580537,\n", " 'index': 300,\n", " 'word': 'Mars',\n", " 'start': 1334,\n", " 'end': 1338},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.65286255,\n", " 'index': 301,\n", " 'word': 'Or',\n", " 'start': 1339,\n", " 'end': 1341},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.6625232,\n", " 'index': 302,\n", " 'word': '##bit',\n", " 'start': 1341,\n", " 'end': 1344},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.58811414,\n", " 'index': 303,\n", " 'word': '##er',\n", " 'start': 1344,\n", " 'end': 1346},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.56405336,\n", " 'index': 304,\n", " 'word': 'camera',\n", " 'start': 1347,\n", " 'end': 1353},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.52700096,\n", " 'index': 305,\n", " 'word': 'team',\n", " 'start': 1354,\n", " 'end': 1358},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.53318214,\n", " 'index': 306,\n", " 'word': 'took',\n", " 'start': 1359,\n", " 'end': 1363},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.535714,\n", " 'index': 307,\n", " 'word': 'a',\n", " 'start': 1364,\n", " 'end': 1365},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.55766904,\n", " 'index': 308,\n", " 'word': 'picture',\n", " 'start': 1366,\n", " 'end': 1373},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.67556494,\n", " 'index': 309,\n", " 'word': 'that',\n", " 'start': 1374,\n", " 'end': 1378},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5773598,\n", " 'index': 310,\n", " 'word': 'was',\n", " 'start': 1379,\n", " 'end': 1382},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7880312,\n", " 'index': 311,\n", " 'word': 'ten',\n", " 'start': 1383,\n", " 'end': 1386},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6340814,\n", " 'index': 312,\n", " 'word': 'times',\n", " 'start': 1387,\n", " 'end': 1392},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.57962173,\n", " 'index': 313,\n", " 'word': 'sharp',\n", " 'start': 1393,\n", " 'end': 1398},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5738646,\n", " 'index': 314,\n", " 'word': '##er',\n", " 'start': 1398,\n", " 'end': 1400},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5188506,\n", " 'index': 315,\n", " 'word': 'than',\n", " 'start': 1401,\n", " 'end': 1405},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5666434,\n", " 'index': 316,\n", " 'word': 'the',\n", " 'start': 1406,\n", " 'end': 1409},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.56921905,\n", " 'index': 317,\n", " 'word': 'original',\n", " 'start': 1410,\n", " 'end': 1418},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6608325,\n", " 'index': 318,\n", " 'word': 'Viking',\n", " 'start': 1419,\n", " 'end': 1425},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5899687,\n", " 'index': 319,\n", " 'word': 'photos',\n", " 'start': 1426,\n", " 'end': 1432},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.83449113,\n", " 'index': 320,\n", " 'word': ',',\n", " 'start': 1432,\n", " 'end': 1433},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.80289483,\n", " 'index': 321,\n", " 'word': 'revealing',\n", " 'start': 1434,\n", " 'end': 1443},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.67294866,\n", " 'index': 322,\n", " 'word': 'a',\n", " 'start': 1444,\n", " 'end': 1445},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5613308,\n", " 'index': 323,\n", " 'word': 'natural',\n", " 'start': 1446,\n", " 'end': 1453},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.505271,\n", " 'index': 324,\n", " 'word': 'land',\n", " 'start': 1454,\n", " 'end': 1458},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6070904,\n", " 'index': 325,\n", " 'word': '##form',\n", " 'start': 1458,\n", " 'end': 1462},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7992068,\n", " 'index': 326,\n", " 'word': ',',\n", " 'start': 1462,\n", " 'end': 1463},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6349267,\n", " 'index': 327,\n", " 'word': 'which',\n", " 'start': 1464,\n", " 'end': 1469},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6860331,\n", " 'index': 328,\n", " 'word': 'meant',\n", " 'start': 1470,\n", " 'end': 1475},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.540704,\n", " 'index': 329,\n", " 'word': 'no',\n", " 'start': 1476,\n", " 'end': 1478},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.62928075,\n", " 'index': 330,\n", " 'word': 'alien',\n", " 'start': 1479,\n", " 'end': 1484},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5880116,\n", " 'index': 331,\n", " 'word': 'monument',\n", " 'start': 1485,\n", " 'end': 1493},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.6744335,\n", " 'index': 332,\n", " 'word': '.',\n", " 'start': 1493,\n", " 'end': 1494},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.56081796,\n", " 'index': 333,\n", " 'word': '\"',\n", " 'start': 1495,\n", " 'end': 1496},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.51550716,\n", " 'index': 334,\n", " 'word': 'But',\n", " 'start': 1496,\n", " 'end': 1499},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.69971544,\n", " 'index': 335,\n", " 'word': 'that',\n", " 'start': 1500,\n", " 'end': 1504},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.59912956,\n", " 'index': 336,\n", " 'word': 'picture',\n", " 'start': 1505,\n", " 'end': 1512},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6235601,\n", " 'index': 337,\n", " 'word': 'wasn',\n", " 'start': 1513,\n", " 'end': 1517},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5095923,\n", " 'index': 338,\n", " 'word': \"'\",\n", " 'start': 1517,\n", " 'end': 1518},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6090436,\n", " 'index': 339,\n", " 'word': 't',\n", " 'start': 1518,\n", " 'end': 1519},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.78969496,\n", " 'index': 340,\n", " 'word': 'very',\n", " 'start': 1520,\n", " 'end': 1524},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7117616,\n", " 'index': 341,\n", " 'word': 'clear',\n", " 'start': 1525,\n", " 'end': 1530},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6051015,\n", " 'index': 342,\n", " 'word': 'at',\n", " 'start': 1531,\n", " 'end': 1533},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5591377,\n", " 'index': 343,\n", " 'word': 'all',\n", " 'start': 1534,\n", " 'end': 1537},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.8091911,\n", " 'index': 344,\n", " 'word': ',',\n", " 'start': 1537,\n", " 'end': 1538},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6422674,\n", " 'index': 345,\n", " 'word': 'which',\n", " 'start': 1539,\n", " 'end': 1544},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.60172576,\n", " 'index': 346,\n", " 'word': 'could',\n", " 'start': 1545,\n", " 'end': 1550},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.60047704,\n", " 'index': 347,\n", " 'word': 'mean',\n", " 'start': 1551,\n", " 'end': 1555},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.56904197,\n", " 'index': 348,\n", " 'word': 'alien',\n", " 'start': 1556,\n", " 'end': 1561},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.62595856,\n", " 'index': 349,\n", " 'word': 'markings',\n", " 'start': 1562,\n", " 'end': 1570},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6299875,\n", " 'index': 350,\n", " 'word': 'were',\n", " 'start': 1571,\n", " 'end': 1575},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6949092,\n", " 'index': 351,\n", " 'word': 'hidden',\n", " 'start': 1576,\n", " 'end': 1582},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6191803,\n", " 'index': 352,\n", " 'word': 'by',\n", " 'start': 1583,\n", " 'end': 1585},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5347818,\n", " 'index': 353,\n", " 'word': 'haze',\n", " 'start': 1586,\n", " 'end': 1590},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5951616,\n", " 'index': 354,\n", " 'word': '\"',\n", " 'start': 1590,\n", " 'end': 1591},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5990277,\n", " 'index': 355,\n", " 'word': 'Well',\n", " 'start': 1592,\n", " 'end': 1596},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.68367994,\n", " 'index': 356,\n", " 'word': 'no',\n", " 'start': 1597,\n", " 'end': 1599},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.780734,\n", " 'index': 357,\n", " 'word': ',',\n", " 'start': 1599,\n", " 'end': 1600},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.51023954,\n", " 'index': 358,\n", " 'word': 'yes',\n", " 'start': 1601,\n", " 'end': 1604},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6934846,\n", " 'index': 359,\n", " 'word': 'that',\n", " 'start': 1605,\n", " 'end': 1609},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.62337947,\n", " 'index': 360,\n", " 'word': 'rumor',\n", " 'start': 1610,\n", " 'end': 1615},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.69536877,\n", " 'index': 361,\n", " 'word': 'started',\n", " 'start': 1616,\n", " 'end': 1623},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.8160849,\n", " 'index': 362,\n", " 'word': ',',\n", " 'start': 1623,\n", " 'end': 1624},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5922601,\n", " 'index': 363,\n", " 'word': 'but',\n", " 'start': 1625,\n", " 'end': 1628},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.64855474,\n", " 'index': 364,\n", " 'word': 'to',\n", " 'start': 1629,\n", " 'end': 1631},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5228099,\n", " 'index': 365,\n", " 'word': 'prove',\n", " 'start': 1632,\n", " 'end': 1637},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6812592,\n", " 'index': 366,\n", " 'word': 'them',\n", " 'start': 1638,\n", " 'end': 1642},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.64243567,\n", " 'index': 367,\n", " 'word': 'wrong',\n", " 'start': 1643,\n", " 'end': 1648},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6727351,\n", " 'index': 368,\n", " 'word': 'on',\n", " 'start': 1649,\n", " 'end': 1651},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.6589173,\n", " 'index': 369,\n", " 'word': 'April',\n", " 'start': 1652,\n", " 'end': 1657},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.6754752,\n", " 'index': 370,\n", " 'word': '8',\n", " 'start': 1658,\n", " 'end': 1659},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.60759,\n", " 'index': 371,\n", " 'word': ',',\n", " 'start': 1659,\n", " 'end': 1660},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.6044962,\n", " 'index': 372,\n", " 'word': '2001',\n", " 'start': 1661,\n", " 'end': 1665},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6850097,\n", " 'index': 373,\n", " 'word': 'we',\n", " 'start': 1666,\n", " 'end': 1668},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5219355,\n", " 'index': 374,\n", " 'word': 'decided',\n", " 'start': 1669,\n", " 'end': 1676},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6707725,\n", " 'index': 375,\n", " 'word': 'to',\n", " 'start': 1677,\n", " 'end': 1679},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.51351875,\n", " 'index': 376,\n", " 'word': 'take',\n", " 'start': 1680,\n", " 'end': 1684},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5230102,\n", " 'index': 377,\n", " 'word': 'another',\n", " 'start': 1685,\n", " 'end': 1692},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5453028,\n", " 'index': 378,\n", " 'word': 'picture',\n", " 'start': 1693,\n", " 'end': 1700},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7876373,\n", " 'index': 379,\n", " 'word': ',',\n", " 'start': 1700,\n", " 'end': 1701},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5465298,\n", " 'index': 380,\n", " 'word': 'making',\n", " 'start': 1702,\n", " 'end': 1708},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.63919556,\n", " 'index': 381,\n", " 'word': 'sure',\n", " 'start': 1709,\n", " 'end': 1713},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5663544,\n", " 'index': 382,\n", " 'word': 'it',\n", " 'start': 1714,\n", " 'end': 1716},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.604365,\n", " 'index': 383,\n", " 'word': 'was',\n", " 'start': 1717,\n", " 'end': 1720},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.66089803,\n", " 'index': 384,\n", " 'word': 'a',\n", " 'start': 1721,\n", " 'end': 1722},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6593189,\n", " 'index': 385,\n", " 'word': 'cloud',\n", " 'start': 1723,\n", " 'end': 1728},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.58361953,\n", " 'index': 386,\n", " 'word': '##less',\n", " 'start': 1728,\n", " 'end': 1732},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7110155,\n", " 'index': 387,\n", " 'word': 'summer',\n", " 'start': 1733,\n", " 'end': 1739},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5355839,\n", " 'index': 388,\n", " 'word': 'day',\n", " 'start': 1740,\n", " 'end': 1743},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.6743491,\n", " 'index': 389,\n", " 'word': '.',\n", " 'start': 1743,\n", " 'end': 1744},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5186097,\n", " 'index': 390,\n", " 'word': 'Mali',\n", " 'start': 1745,\n", " 'end': 1749},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.6076603,\n", " 'index': 391,\n", " 'word': '##n',\n", " 'start': 1749,\n", " 'end': 1750},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5730305,\n", " 'index': 392,\n", " 'word': \"'\",\n", " 'start': 1750,\n", " 'end': 1751},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.55874217,\n", " 'index': 393,\n", " 'word': 's',\n", " 'start': 1751,\n", " 'end': 1752},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5618185,\n", " 'index': 394,\n", " 'word': 'team',\n", " 'start': 1753,\n", " 'end': 1757},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5198947,\n", " 'index': 395,\n", " 'word': 'captured',\n", " 'start': 1758,\n", " 'end': 1766},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.53001165,\n", " 'index': 396,\n", " 'word': 'an',\n", " 'start': 1767,\n", " 'end': 1769},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.6115317,\n", " 'index': 397,\n", " 'word': 'amazing',\n", " 'start': 1770,\n", " 'end': 1777},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.61094236,\n", " 'index': 398,\n", " 'word': 'photo',\n", " 'start': 1778,\n", " 'end': 1783},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6055951,\n", " 'index': 399,\n", " 'word': 'using',\n", " 'start': 1784,\n", " 'end': 1789},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.6140764,\n", " 'index': 400,\n", " 'word': 'the',\n", " 'start': 1790,\n", " 'end': 1793},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6442308,\n", " 'index': 401,\n", " 'word': 'camera',\n", " 'start': 1794,\n", " 'end': 1800},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.63841516,\n", " 'index': 402,\n", " 'word': \"'\",\n", " 'start': 1800,\n", " 'end': 1801},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5404359,\n", " 'index': 403,\n", " 'word': 's',\n", " 'start': 1801,\n", " 'end': 1802},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5500208,\n", " 'index': 404,\n", " 'word': 'absolute',\n", " 'start': 1803,\n", " 'end': 1811},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.52286357,\n", " 'index': 405,\n", " 'word': 'maximum',\n", " 'start': 1812,\n", " 'end': 1819},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.51756054,\n", " 'index': 406,\n", " 'word': 'revolution',\n", " 'start': 1820,\n", " 'end': 1830},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.6743314,\n", " 'index': 407,\n", " 'word': '.',\n", " 'start': 1830,\n", " 'end': 1831},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5196729,\n", " 'index': 408,\n", " 'word': 'With',\n", " 'start': 1832,\n", " 'end': 1836},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.542713,\n", " 'index': 409,\n", " 'word': 'this',\n", " 'start': 1837,\n", " 'end': 1841},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.61549795,\n", " 'index': 410,\n", " 'word': 'camera',\n", " 'start': 1842,\n", " 'end': 1848},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6854319,\n", " 'index': 411,\n", " 'word': 'you',\n", " 'start': 1849,\n", " 'end': 1852},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.54689115,\n", " 'index': 412,\n", " 'word': 'can',\n", " 'start': 1853,\n", " 'end': 1856},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.51496017,\n", " 'index': 413,\n", " 'word': 'disc',\n", " 'start': 1857,\n", " 'end': 1861},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.51617885,\n", " 'index': 414,\n", " 'word': '##ern',\n", " 'start': 1861,\n", " 'end': 1864},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5568011,\n", " 'index': 415,\n", " 'word': 'things',\n", " 'start': 1865,\n", " 'end': 1871},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.62646383,\n", " 'index': 416,\n", " 'word': 'in',\n", " 'start': 1872,\n", " 'end': 1874},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5608486,\n", " 'index': 417,\n", " 'word': 'a',\n", " 'start': 1875,\n", " 'end': 1876},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.50235623,\n", " 'index': 418,\n", " 'word': 'digital',\n", " 'start': 1877,\n", " 'end': 1884},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5582305,\n", " 'index': 419,\n", " 'word': 'image',\n", " 'start': 1885,\n", " 'end': 1890},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7322742,\n", " 'index': 420,\n", " 'word': ',',\n", " 'start': 1890,\n", " 'end': 1891},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6216431,\n", " 'index': 421,\n", " 'word': '3',\n", " 'start': 1892,\n", " 'end': 1893},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6138383,\n", " 'index': 422,\n", " 'word': 'times',\n", " 'start': 1894,\n", " 'end': 1899},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.61178154,\n", " 'index': 423,\n", " 'word': 'bigger',\n", " 'start': 1900,\n", " 'end': 1906},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.51271856,\n", " 'index': 424,\n", " 'word': 'than',\n", " 'start': 1907,\n", " 'end': 1911},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5323224,\n", " 'index': 425,\n", " 'word': 'the',\n", " 'start': 1912,\n", " 'end': 1915},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5431272,\n", " 'index': 426,\n", " 'word': 'p',\n", " 'start': 1916,\n", " 'end': 1917},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.50044477,\n", " 'index': 427,\n", " 'word': '##ixel',\n", " 'start': 1917,\n", " 'end': 1921},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.55557716,\n", " 'index': 428,\n", " 'word': 'size',\n", " 'start': 1922,\n", " 'end': 1926},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.616753,\n", " 'index': 429,\n", " 'word': 'which',\n", " 'start': 1927,\n", " 'end': 1932},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.71363014,\n", " 'index': 430,\n", " 'word': 'means',\n", " 'start': 1933,\n", " 'end': 1938},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5364858,\n", " 'index': 431,\n", " 'word': 'if',\n", " 'start': 1939,\n", " 'end': 1941},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5746519,\n", " 'index': 432,\n", " 'word': 'there',\n", " 'start': 1942,\n", " 'end': 1947},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5798605,\n", " 'index': 433,\n", " 'word': 'were',\n", " 'start': 1948,\n", " 'end': 1952},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.7288978,\n", " 'index': 434,\n", " 'word': 'any',\n", " 'start': 1953,\n", " 'end': 1956},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.55390185,\n", " 'index': 435,\n", " 'word': 'signs',\n", " 'start': 1957,\n", " 'end': 1962},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6790573,\n", " 'index': 436,\n", " 'word': 'of',\n", " 'start': 1963,\n", " 'end': 1965},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6747198,\n", " 'index': 437,\n", " 'word': 'life',\n", " 'start': 1966,\n", " 'end': 1970},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.76907825,\n", " 'index': 438,\n", " 'word': ',',\n", " 'start': 1970,\n", " 'end': 1971},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.68350637,\n", " 'index': 439,\n", " 'word': 'you',\n", " 'start': 1972,\n", " 'end': 1975},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.50333244,\n", " 'index': 440,\n", " 'word': 'could',\n", " 'start': 1976,\n", " 'end': 1981},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.51758593,\n", " 'index': 441,\n", " 'word': 'easily',\n", " 'start': 1982,\n", " 'end': 1988},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6057637,\n", " 'index': 442,\n", " 'word': 'see',\n", " 'start': 1989,\n", " 'end': 1992},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.52759,\n", " 'index': 443,\n", " 'word': 'what',\n", " 'start': 1993,\n", " 'end': 1997},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.53059214,\n", " 'index': 444,\n", " 'word': 'they',\n", " 'start': 1998,\n", " 'end': 2002},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6364522,\n", " 'index': 445,\n", " 'word': 'were',\n", " 'start': 2003,\n", " 'end': 2007},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.67442816,\n", " 'index': 446,\n", " 'word': '.',\n", " 'start': 2007,\n", " 'end': 2008},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5476038,\n", " 'index': 447,\n", " 'word': 'What',\n", " 'start': 2009,\n", " 'end': 2013},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5251065,\n", " 'index': 448,\n", " 'word': 'the',\n", " 'start': 2014,\n", " 'end': 2017},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.52539897,\n", " 'index': 449,\n", " 'word': 'picture',\n", " 'start': 2018,\n", " 'end': 2025},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7622849,\n", " 'index': 450,\n", " 'word': 'showed',\n", " 'start': 2026,\n", " 'end': 2032},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7453803,\n", " 'index': 451,\n", " 'word': 'was',\n", " 'start': 2033,\n", " 'end': 2036},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6480745,\n", " 'index': 452,\n", " 'word': 'the',\n", " 'start': 2037,\n", " 'end': 2040},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5418471,\n", " 'index': 453,\n", " 'word': 'butt',\n", " 'start': 2041,\n", " 'end': 2045},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5352597,\n", " 'index': 454,\n", " 'word': '##e',\n", " 'start': 2045,\n", " 'end': 2046},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.64424896,\n", " 'index': 455,\n", " 'word': 'or',\n", " 'start': 2047,\n", " 'end': 2049},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7269686,\n", " 'index': 456,\n", " 'word': 'me',\n", " 'start': 2050,\n", " 'end': 2052},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.68219054,\n", " 'index': 457,\n", " 'word': '##sa',\n", " 'start': 2052,\n", " 'end': 2054},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.7789693,\n", " 'index': 458,\n", " 'word': ',',\n", " 'start': 2054,\n", " 'end': 2055},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.571613,\n", " 'index': 459,\n", " 'word': 'which',\n", " 'start': 2056,\n", " 'end': 2061},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5934434,\n", " 'index': 460,\n", " 'word': 'are',\n", " 'start': 2062,\n", " 'end': 2065},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.58099824,\n", " 'index': 461,\n", " 'word': 'land',\n", " 'start': 2066,\n", " 'end': 2070},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5988671,\n", " 'index': 462,\n", " 'word': '##form',\n", " 'start': 2070,\n", " 'end': 2074},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.58407044,\n", " 'index': 463,\n", " 'word': '##s',\n", " 'start': 2074,\n", " 'end': 2075},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.68731594,\n", " 'index': 464,\n", " 'word': 'common',\n", " 'start': 2076,\n", " 'end': 2082},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.6056752,\n", " 'index': 465,\n", " 'word': 'around',\n", " 'start': 2083,\n", " 'end': 2089},\n", " {'entity': 'LABEL_1',\n", " 'score': 0.5920002,\n", " 'index': 466,\n", " 'word': 'the',\n", " 'start': 2090,\n", " 'end': 2093},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.53270173,\n", " 'index': 467,\n", " 'word': 'American',\n", " 'start': 2094,\n", " 'end': 2102},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.5303487,\n", " 'index': 468,\n", " 'word': 'West',\n", " 'start': 2103,\n", " 'end': 2107},\n", " {'entity': 'LABEL_0',\n", " 'score': 0.67443454,\n", " 'index': 469,\n", " 'word': '.',\n", " 'start': 2107,\n", " 'end': 2108}]" ] }, "execution_count": 113, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "\n", "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"google-bert/bert-large-cased-whole-word-masking\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"google-bert/bert-large-cased-whole-word-masking\")\n", "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "\n", "ner_results = nlp(text)\n", "ner_results\n", "\n" ] }, { "cell_type": "markdown", "id": "33fb7228-f9d1-49a1-83a2-3051a9a490cf", "metadata": {}, "source": [ "## 54 PlanTL-GOB-ES/es_anonimization_core_lg" ] }, { "cell_type": "code", "execution_count": 114, "id": "4bff7320-9f0f-4bd8-bf10-902f1c0c9100", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\NW\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n" ] }, { "ename": "OSError", "evalue": "PlanTL-GOB-ES/es_anonimization_core_lg does not appear to have a file named config.json. Checkout 'https://huggingface.co/PlanTL-GOB-ES/es_anonimization_core_lg/tree/main' for available files.", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mHTTPError\u001b[0m Traceback (most recent call last)", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\utils\\_errors.py:304\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[1;34m(response, endpoint_name)\u001b[0m\n\u001b[0;32m 303\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 304\u001b[0m response\u001b[38;5;241m.\u001b[39mraise_for_status()\n\u001b[0;32m 305\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError \u001b[38;5;28;01mas\u001b[39;00m e:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\requests\\models.py:1024\u001b[0m, in \u001b[0;36mResponse.raise_for_status\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1023\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m http_error_msg:\n\u001b[1;32m-> 1024\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(http_error_msg, response\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m)\n", "\u001b[1;31mHTTPError\u001b[0m: 404 Client Error: Not Found for url: https://huggingface.co/PlanTL-GOB-ES/es_anonimization_core_lg/resolve/main/config.json", "\nThe above exception was the direct cause of the following exception:\n", "\u001b[1;31mEntryNotFoundError\u001b[0m Traceback (most recent call last)", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\utils\\hub.py:399\u001b[0m, in \u001b[0;36mcached_file\u001b[1;34m(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)\u001b[0m\n\u001b[0;32m 397\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 398\u001b[0m \u001b[38;5;66;03m# Load from URL or cache if already cached\u001b[39;00m\n\u001b[1;32m--> 399\u001b[0m resolved_file \u001b[38;5;241m=\u001b[39m hf_hub_download(\n\u001b[0;32m 400\u001b[0m path_or_repo_id,\n\u001b[0;32m 401\u001b[0m filename,\n\u001b[0;32m 402\u001b[0m subfolder\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(subfolder) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m subfolder,\n\u001b[0;32m 403\u001b[0m repo_type\u001b[38;5;241m=\u001b[39mrepo_type,\n\u001b[0;32m 404\u001b[0m revision\u001b[38;5;241m=\u001b[39mrevision,\n\u001b[0;32m 405\u001b[0m cache_dir\u001b[38;5;241m=\u001b[39mcache_dir,\n\u001b[0;32m 406\u001b[0m user_agent\u001b[38;5;241m=\u001b[39muser_agent,\n\u001b[0;32m 407\u001b[0m force_download\u001b[38;5;241m=\u001b[39mforce_download,\n\u001b[0;32m 408\u001b[0m proxies\u001b[38;5;241m=\u001b[39mproxies,\n\u001b[0;32m 409\u001b[0m resume_download\u001b[38;5;241m=\u001b[39mresume_download,\n\u001b[0;32m 410\u001b[0m token\u001b[38;5;241m=\u001b[39mtoken,\n\u001b[0;32m 411\u001b[0m local_files_only\u001b[38;5;241m=\u001b[39mlocal_files_only,\n\u001b[0;32m 412\u001b[0m )\n\u001b[0;32m 413\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m GatedRepoError \u001b[38;5;28;01mas\u001b[39;00m e:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\utils\\_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.._inner_fn\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 112\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[1;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1221\u001b[0m, in \u001b[0;36mhf_hub_download\u001b[1;34m(repo_id, filename, subfolder, repo_type, revision, library_name, library_version, cache_dir, local_dir, user_agent, force_download, proxies, etag_timeout, token, local_files_only, headers, endpoint, legacy_cache_layout, resume_download, force_filename, local_dir_use_symlinks)\u001b[0m\n\u001b[0;32m 1220\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1221\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _hf_hub_download_to_cache_dir(\n\u001b[0;32m 1222\u001b[0m \u001b[38;5;66;03m# Destination\u001b[39;00m\n\u001b[0;32m 1223\u001b[0m cache_dir\u001b[38;5;241m=\u001b[39mcache_dir,\n\u001b[0;32m 1224\u001b[0m \u001b[38;5;66;03m# File info\u001b[39;00m\n\u001b[0;32m 1225\u001b[0m repo_id\u001b[38;5;241m=\u001b[39mrepo_id,\n\u001b[0;32m 1226\u001b[0m filename\u001b[38;5;241m=\u001b[39mfilename,\n\u001b[0;32m 1227\u001b[0m repo_type\u001b[38;5;241m=\u001b[39mrepo_type,\n\u001b[0;32m 1228\u001b[0m revision\u001b[38;5;241m=\u001b[39mrevision,\n\u001b[0;32m 1229\u001b[0m \u001b[38;5;66;03m# HTTP info\u001b[39;00m\n\u001b[0;32m 1230\u001b[0m headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[0;32m 1231\u001b[0m proxies\u001b[38;5;241m=\u001b[39mproxies,\n\u001b[0;32m 1232\u001b[0m etag_timeout\u001b[38;5;241m=\u001b[39metag_timeout,\n\u001b[0;32m 1233\u001b[0m endpoint\u001b[38;5;241m=\u001b[39mendpoint,\n\u001b[0;32m 1234\u001b[0m \u001b[38;5;66;03m# Additional options\u001b[39;00m\n\u001b[0;32m 1235\u001b[0m local_files_only\u001b[38;5;241m=\u001b[39mlocal_files_only,\n\u001b[0;32m 1236\u001b[0m force_download\u001b[38;5;241m=\u001b[39mforce_download,\n\u001b[0;32m 1237\u001b[0m )\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1282\u001b[0m, in \u001b[0;36m_hf_hub_download_to_cache_dir\u001b[1;34m(cache_dir, repo_id, filename, repo_type, revision, headers, proxies, etag_timeout, endpoint, local_files_only, force_download)\u001b[0m\n\u001b[0;32m 1280\u001b[0m \u001b[38;5;66;03m# Try to get metadata (etag, commit_hash, url, size) from the server.\u001b[39;00m\n\u001b[0;32m 1281\u001b[0m \u001b[38;5;66;03m# If we can't, a HEAD request error is returned.\u001b[39;00m\n\u001b[1;32m-> 1282\u001b[0m (url_to_download, etag, commit_hash, expected_size, head_call_error) \u001b[38;5;241m=\u001b[39m _get_metadata_or_catch_error(\n\u001b[0;32m 1283\u001b[0m repo_id\u001b[38;5;241m=\u001b[39mrepo_id,\n\u001b[0;32m 1284\u001b[0m filename\u001b[38;5;241m=\u001b[39mfilename,\n\u001b[0;32m 1285\u001b[0m repo_type\u001b[38;5;241m=\u001b[39mrepo_type,\n\u001b[0;32m 1286\u001b[0m revision\u001b[38;5;241m=\u001b[39mrevision,\n\u001b[0;32m 1287\u001b[0m endpoint\u001b[38;5;241m=\u001b[39mendpoint,\n\u001b[0;32m 1288\u001b[0m proxies\u001b[38;5;241m=\u001b[39mproxies,\n\u001b[0;32m 1289\u001b[0m etag_timeout\u001b[38;5;241m=\u001b[39metag_timeout,\n\u001b[0;32m 1290\u001b[0m headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[0;32m 1291\u001b[0m local_files_only\u001b[38;5;241m=\u001b[39mlocal_files_only,\n\u001b[0;32m 1292\u001b[0m storage_folder\u001b[38;5;241m=\u001b[39mstorage_folder,\n\u001b[0;32m 1293\u001b[0m relative_filename\u001b[38;5;241m=\u001b[39mrelative_filename,\n\u001b[0;32m 1294\u001b[0m )\n\u001b[0;32m 1296\u001b[0m \u001b[38;5;66;03m# etag can be None for several reasons:\u001b[39;00m\n\u001b[0;32m 1297\u001b[0m \u001b[38;5;66;03m# 1. we passed local_files_only.\u001b[39;00m\n\u001b[0;32m 1298\u001b[0m \u001b[38;5;66;03m# 2. we don't have a connection\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 1304\u001b[0m \u001b[38;5;66;03m# If the specified revision is a commit hash, look inside \"snapshots\".\u001b[39;00m\n\u001b[0;32m 1305\u001b[0m \u001b[38;5;66;03m# If the specified revision is a branch or tag, look inside \"refs\".\u001b[39;00m\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1722\u001b[0m, in \u001b[0;36m_get_metadata_or_catch_error\u001b[1;34m(repo_id, filename, repo_type, revision, endpoint, proxies, etag_timeout, headers, local_files_only, relative_filename, storage_folder)\u001b[0m\n\u001b[0;32m 1721\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m-> 1722\u001b[0m metadata \u001b[38;5;241m=\u001b[39m get_hf_file_metadata(url\u001b[38;5;241m=\u001b[39murl, proxies\u001b[38;5;241m=\u001b[39mproxies, timeout\u001b[38;5;241m=\u001b[39metag_timeout, headers\u001b[38;5;241m=\u001b[39mheaders)\n\u001b[0;32m 1723\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m EntryNotFoundError \u001b[38;5;28;01mas\u001b[39;00m http_error:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\utils\\_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.._inner_fn\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 112\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[1;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:1645\u001b[0m, in \u001b[0;36mget_hf_file_metadata\u001b[1;34m(url, token, proxies, timeout, library_name, library_version, user_agent, headers)\u001b[0m\n\u001b[0;32m 1644\u001b[0m \u001b[38;5;66;03m# Retrieve metadata\u001b[39;00m\n\u001b[1;32m-> 1645\u001b[0m r \u001b[38;5;241m=\u001b[39m _request_wrapper(\n\u001b[0;32m 1646\u001b[0m method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHEAD\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 1647\u001b[0m url\u001b[38;5;241m=\u001b[39murl,\n\u001b[0;32m 1648\u001b[0m headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[0;32m 1649\u001b[0m allow_redirects\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m 1650\u001b[0m follow_relative_redirects\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[0;32m 1651\u001b[0m proxies\u001b[38;5;241m=\u001b[39mproxies,\n\u001b[0;32m 1652\u001b[0m timeout\u001b[38;5;241m=\u001b[39mtimeout,\n\u001b[0;32m 1653\u001b[0m )\n\u001b[0;32m 1654\u001b[0m hf_raise_for_status(r)\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:372\u001b[0m, in \u001b[0;36m_request_wrapper\u001b[1;34m(method, url, follow_relative_redirects, **params)\u001b[0m\n\u001b[0;32m 371\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m follow_relative_redirects:\n\u001b[1;32m--> 372\u001b[0m response \u001b[38;5;241m=\u001b[39m _request_wrapper(\n\u001b[0;32m 373\u001b[0m method\u001b[38;5;241m=\u001b[39mmethod,\n\u001b[0;32m 374\u001b[0m url\u001b[38;5;241m=\u001b[39murl,\n\u001b[0;32m 375\u001b[0m follow_relative_redirects\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m 376\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams,\n\u001b[0;32m 377\u001b[0m )\n\u001b[0;32m 379\u001b[0m \u001b[38;5;66;03m# If redirection, we redirect only relative paths.\u001b[39;00m\n\u001b[0;32m 380\u001b[0m \u001b[38;5;66;03m# This is useful in case of a renamed repository.\u001b[39;00m\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:396\u001b[0m, in \u001b[0;36m_request_wrapper\u001b[1;34m(method, url, follow_relative_redirects, **params)\u001b[0m\n\u001b[0;32m 395\u001b[0m response \u001b[38;5;241m=\u001b[39m get_session()\u001b[38;5;241m.\u001b[39mrequest(method\u001b[38;5;241m=\u001b[39mmethod, url\u001b[38;5;241m=\u001b[39murl, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams)\n\u001b[1;32m--> 396\u001b[0m hf_raise_for_status(response)\n\u001b[0;32m 397\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\huggingface_hub\\utils\\_errors.py:315\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[1;34m(response, endpoint_name)\u001b[0m\n\u001b[0;32m 314\u001b[0m message \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mstatus_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m Client Error.\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEntry Not Found for url: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m--> 315\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m EntryNotFoundError(message, response) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[0;32m 317\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m error_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGatedRepo\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n", "\u001b[1;31mEntryNotFoundError\u001b[0m: 404 Client Error. (Request ID: Root=1-6689e097-39a33c1619221c92312ec259;76a7f0ec-2b46-42ee-9607-ec1424ceed75)\n\nEntry Not Found for url: https://huggingface.co/PlanTL-GOB-ES/es_anonimization_core_lg/resolve/main/config.json.", "\nThe above exception was the direct cause of the following exception:\n", "\u001b[1;31mOSError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[114], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoTokenizer, AutoModelForTokenClassification\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pipeline\n\u001b[1;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPlanTL-GOB-ES/es_anonimization_core_lg\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForTokenClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPlanTL-GOB-ES/es_anonimization_core_lg\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 5\u001b[0m nlp \u001b[38;5;241m=\u001b[39m pipeline(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mner\u001b[39m\u001b[38;5;124m\"\u001b[39m, model\u001b[38;5;241m=\u001b[39mmodel, tokenizer\u001b[38;5;241m=\u001b[39mtokenizer)\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 835\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfor_model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict)\n\u001b[0;32m 836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 837\u001b[0m config \u001b[38;5;241m=\u001b[39m AutoConfig\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 838\u001b[0m pretrained_model_name_or_path, trust_remote_code\u001b[38;5;241m=\u001b[39mtrust_remote_code, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 839\u001b[0m )\n\u001b[0;32m 840\u001b[0m config_tokenizer_class \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39mtokenizer_class\n\u001b[0;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(config, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoTokenizer\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config\u001b[38;5;241m.\u001b[39mauto_map:\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:934\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 931\u001b[0m trust_remote_code \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtrust_remote_code\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[0;32m 932\u001b[0m code_revision \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcode_revision\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m--> 934\u001b[0m config_dict, unused_kwargs \u001b[38;5;241m=\u001b[39m PretrainedConfig\u001b[38;5;241m.\u001b[39mget_config_dict(pretrained_model_name_or_path, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 935\u001b[0m has_remote_code \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config_dict \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAutoConfig\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauto_map\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m 936\u001b[0m has_local_code \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config_dict \u001b[38;5;129;01mand\u001b[39;00m config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;129;01min\u001b[39;00m CONFIG_MAPPING\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\configuration_utils.py:632\u001b[0m, in \u001b[0;36mPretrainedConfig.get_config_dict\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 630\u001b[0m original_kwargs \u001b[38;5;241m=\u001b[39m copy\u001b[38;5;241m.\u001b[39mdeepcopy(kwargs)\n\u001b[0;32m 631\u001b[0m \u001b[38;5;66;03m# Get config dict associated with the base config file\u001b[39;00m\n\u001b[1;32m--> 632\u001b[0m config_dict, kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_get_config_dict(pretrained_model_name_or_path, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 633\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_commit_hash\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config_dict:\n\u001b[0;32m 634\u001b[0m original_kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_commit_hash\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_commit_hash\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\configuration_utils.py:689\u001b[0m, in \u001b[0;36mPretrainedConfig._get_config_dict\u001b[1;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[0;32m 685\u001b[0m configuration_file \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_configuration_file\u001b[39m\u001b[38;5;124m\"\u001b[39m, CONFIG_NAME) \u001b[38;5;28;01mif\u001b[39;00m gguf_file \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m gguf_file\n\u001b[0;32m 687\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 688\u001b[0m \u001b[38;5;66;03m# Load from local folder or from cache or download from model Hub and cache\u001b[39;00m\n\u001b[1;32m--> 689\u001b[0m resolved_config_file \u001b[38;5;241m=\u001b[39m cached_file(\n\u001b[0;32m 690\u001b[0m pretrained_model_name_or_path,\n\u001b[0;32m 691\u001b[0m configuration_file,\n\u001b[0;32m 692\u001b[0m cache_dir\u001b[38;5;241m=\u001b[39mcache_dir,\n\u001b[0;32m 693\u001b[0m force_download\u001b[38;5;241m=\u001b[39mforce_download,\n\u001b[0;32m 694\u001b[0m proxies\u001b[38;5;241m=\u001b[39mproxies,\n\u001b[0;32m 695\u001b[0m resume_download\u001b[38;5;241m=\u001b[39mresume_download,\n\u001b[0;32m 696\u001b[0m local_files_only\u001b[38;5;241m=\u001b[39mlocal_files_only,\n\u001b[0;32m 697\u001b[0m token\u001b[38;5;241m=\u001b[39mtoken,\n\u001b[0;32m 698\u001b[0m user_agent\u001b[38;5;241m=\u001b[39muser_agent,\n\u001b[0;32m 699\u001b[0m revision\u001b[38;5;241m=\u001b[39mrevision,\n\u001b[0;32m 700\u001b[0m subfolder\u001b[38;5;241m=\u001b[39msubfolder,\n\u001b[0;32m 701\u001b[0m _commit_hash\u001b[38;5;241m=\u001b[39mcommit_hash,\n\u001b[0;32m 702\u001b[0m )\n\u001b[0;32m 703\u001b[0m commit_hash \u001b[38;5;241m=\u001b[39m extract_commit_hash(resolved_config_file, commit_hash)\n\u001b[0;32m 704\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m:\n\u001b[0;32m 705\u001b[0m \u001b[38;5;66;03m# Raise any environment error raise by `cached_file`. It will have a helpful error message adapted to\u001b[39;00m\n\u001b[0;32m 706\u001b[0m \u001b[38;5;66;03m# the original exception.\u001b[39;00m\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\utils\\hub.py:453\u001b[0m, in \u001b[0;36mcached_file\u001b[1;34m(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)\u001b[0m\n\u001b[0;32m 451\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m revision \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 452\u001b[0m revision \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmain\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m--> 453\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[0;32m 454\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_or_repo_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m does not appear to have a file named \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfull_filename\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m. Checkout \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 455\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttps://huggingface.co/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_or_repo_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/tree/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrevision\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m for available files.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 456\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[0;32m 457\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m 458\u001b[0m resolved_file \u001b[38;5;241m=\u001b[39m _get_cache_file_to_return(path_or_repo_id, full_filename, cache_dir, revision)\n", "\u001b[1;31mOSError\u001b[0m: PlanTL-GOB-ES/es_anonimization_core_lg does not appear to have a file named config.json. Checkout 'https://huggingface.co/PlanTL-GOB-ES/es_anonimization_core_lg/tree/main' for available files." ] } ], "source": [ "\n", "\n", "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", "from transformers import pipeline\n", "tokenizer = AutoTokenizer.from_pretrained(\"PlanTL-GOB-ES/es_anonimization_core_lg\")\n", "model = AutoModelForTokenClassification.from_pretrained(\"PlanTL-GOB-ES/es_anonimization_core_lg\")\n", "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n", "\n", "ner_results = nlp(text)\n", "ner_results\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "c51532f0-b5c5-43ad-a5a4-936a4876fd9f", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.7" } }, "nbformat": 4, "nbformat_minor": 5 }