{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "GzE_W3UhwV2_", "outputId": "add0f0f9-1722-40c0-e251-40768042e98d" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", "Collecting gradio\n", " Downloading gradio-3.24.1-py3-none-any.whl (15.7 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m34.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: markdown-it-py[linkify]>=2.0.0 in /usr/local/lib/python3.9/dist-packages (from gradio) (2.2.0)\n", "Collecting ffmpy\n", " Downloading ffmpy-0.3.0.tar.gz (4.8 kB)\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Collecting python-multipart\n", " Downloading python_multipart-0.0.6-py3-none-any.whl (45 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.7/45.7 KB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: pydantic in /usr/local/lib/python3.9/dist-packages (from gradio) (1.10.7)\n", "Collecting semantic-version\n", " Downloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n", "Collecting websockets>=10.0\n", " Downloading websockets-11.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.5/129.5 KB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.9/dist-packages (from gradio) (1.22.4)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.9/dist-packages (from gradio) (1.4.4)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from gradio) (2.27.1)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.9/dist-packages (from gradio) (3.1.2)\n", "Collecting uvicorn\n", " Downloading uvicorn-0.21.1-py3-none-any.whl (57 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.8/57.8 KB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: pillow in /usr/local/lib/python3.9/dist-packages (from gradio) (8.4.0)\n", "Collecting aiofiles\n", " Downloading aiofiles-23.1.0-py3-none-any.whl (14 kB)\n", "Collecting aiohttp\n", " Downloading aiohttp-3.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: altair>=4.2.0 in /usr/local/lib/python3.9/dist-packages (from gradio) (4.2.2)\n", "Requirement already satisfied: matplotlib in /usr/local/lib/python3.9/dist-packages (from gradio) (3.7.1)\n", "Collecting gradio-client>=0.0.5\n", " Downloading gradio_client-0.0.7-py3-none-any.whl (14 kB)\n", "Collecting fastapi\n", " Downloading fastapi-0.95.0-py3-none-any.whl (57 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.1/57.1 KB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting httpx\n", " Downloading httpx-0.23.3-py3-none-any.whl (71 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.5/71.5 KB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting huggingface-hub>=0.13.0\n", " Downloading huggingface_hub-0.13.3-py3-none-any.whl (199 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.8/199.8 KB\u001b[0m \u001b[31m23.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting mdit-py-plugins<=0.3.3\n", " Downloading mdit_py_plugins-0.3.3-py3-none-any.whl (50 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.5/50.5 KB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: markupsafe in /usr/local/lib/python3.9/dist-packages (from gradio) (2.1.2)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.9/dist-packages (from gradio) (4.5.0)\n", "Collecting orjson\n", " Downloading orjson-3.8.9-cp39-cp39-manylinux_2_28_x86_64.whl (144 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m144.1/144.1 KB\u001b[0m \u001b[31m18.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: pyyaml in /usr/local/lib/python3.9/dist-packages (from gradio) (6.0)\n", "Collecting pydub\n", " Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n", "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.9/dist-packages (from altair>=4.2.0->gradio) (4.3.3)\n", "Requirement already satisfied: toolz in /usr/local/lib/python3.9/dist-packages (from altair>=4.2.0->gradio) (0.12.0)\n", "Requirement already satisfied: entrypoints in /usr/local/lib/python3.9/dist-packages (from altair>=4.2.0->gradio) (0.4)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.9/dist-packages (from gradio-client>=0.0.5->gradio) (2023.3.0)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.9/dist-packages (from gradio-client>=0.0.5->gradio) (23.0)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from huggingface-hub>=0.13.0->gradio) (3.10.7)\n", "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.9/dist-packages (from huggingface-hub>=0.13.0->gradio) (4.65.0)\n", "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.9/dist-packages (from markdown-it-py[linkify]>=2.0.0->gradio) (0.1.2)\n", "Collecting linkify-it-py<3,>=1\n", " Downloading linkify_it_py-2.0.0-py3-none-any.whl (19 kB)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.9/dist-packages (from pandas->gradio) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.9/dist-packages (from pandas->gradio) (2022.7.1)\n", "Collecting async-timeout<5.0,>=4.0.0a3\n", " Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)\n", "Collecting frozenlist>=1.1.1\n", " Downloading frozenlist-1.3.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (158 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m158.8/158.8 KB\u001b[0m \u001b[31m17.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting yarl<2.0,>=1.0\n", " Downloading yarl-1.8.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (264 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m264.6/264.6 KB\u001b[0m \u001b[31m29.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting aiosignal>=1.1.2\n", " Downloading aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n", "Collecting multidict<7.0,>=4.5\n", " Downloading multidict-6.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (114 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.2/114.2 KB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp->gradio) (22.2.0)\n", "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp->gradio) (2.0.12)\n", "Collecting starlette<0.27.0,>=0.26.1\n", " Downloading starlette-0.26.1-py3-none-any.whl (66 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.9/66.9 KB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: certifi in /usr/local/lib/python3.9/dist-packages (from httpx->gradio) (2022.12.7)\n", "Collecting httpcore<0.17.0,>=0.15.0\n", " Downloading httpcore-0.16.3-py3-none-any.whl (69 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.6/69.6 KB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: sniffio in /usr/local/lib/python3.9/dist-packages (from httpx->gradio) (1.3.0)\n", "Collecting rfc3986[idna2008]<2,>=1.3\n", " Downloading rfc3986-1.5.0-py2.py3-none-any.whl (31 kB)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.9/dist-packages (from matplotlib->gradio) (1.4.4)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.9/dist-packages (from matplotlib->gradio) (0.11.0)\n", "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.9/dist-packages (from matplotlib->gradio) (3.0.9)\n", "Requirement already satisfied: importlib-resources>=3.2.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib->gradio) (5.12.0)\n", "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.9/dist-packages (from matplotlib->gradio) (1.0.7)\n", "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib->gradio) (4.39.3)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests->gradio) (1.26.15)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->gradio) (3.4)\n", "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.9/dist-packages (from uvicorn->gradio) (8.1.3)\n", "Collecting h11>=0.8\n", " Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 KB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: anyio<5.0,>=3.0 in /usr/local/lib/python3.9/dist-packages (from httpcore<0.17.0,>=0.15.0->httpx->gradio) (3.6.2)\n", "Requirement already satisfied: zipp>=3.1.0 in /usr/local/lib/python3.9/dist-packages (from importlib-resources>=3.2.0->matplotlib->gradio) (3.15.0)\n", "Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /usr/local/lib/python3.9/dist-packages (from jsonschema>=3.0->altair>=4.2.0->gradio) (0.19.3)\n", "Collecting uc-micro-py\n", " Downloading uc_micro_py-1.0.1-py3-none-any.whl (6.2 kB)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.9/dist-packages (from python-dateutil>=2.8.1->pandas->gradio) (1.16.0)\n", "Building wheels for collected packages: ffmpy\n", " Building wheel for ffmpy (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for ffmpy: filename=ffmpy-0.3.0-py3-none-any.whl size=4707 sha256=6c11ab449511a5b4400565cca1a34e683c2488a9714f17eccad6aec67d058c21\n", " Stored in directory: /root/.cache/pip/wheels/91/e2/96/f676aa08bfd789328c6576cd0f1fde4a3d686703bb0c247697\n", "Successfully built ffmpy\n", "Installing collected packages: rfc3986, pydub, ffmpy, websockets, uc-micro-py, semantic-version, python-multipart, orjson, multidict, h11, frozenlist, async-timeout, aiofiles, yarl, uvicorn, starlette, mdit-py-plugins, linkify-it-py, huggingface-hub, httpcore, aiosignal, httpx, gradio-client, fastapi, aiohttp, gradio\n", "Successfully installed aiofiles-23.1.0 aiohttp-3.8.4 aiosignal-1.3.1 async-timeout-4.0.2 fastapi-0.95.0 ffmpy-0.3.0 frozenlist-1.3.3 gradio-3.24.1 gradio-client-0.0.7 h11-0.14.0 httpcore-0.16.3 httpx-0.23.3 huggingface-hub-0.13.3 linkify-it-py-2.0.0 mdit-py-plugins-0.3.3 multidict-6.0.4 orjson-3.8.9 pydub-0.25.1 python-multipart-0.0.6 rfc3986-1.5.0 semantic-version-2.10.0 starlette-0.26.1 uc-micro-py-1.0.1 uvicorn-0.21.1 websockets-11.0 yarl-1.8.2\n" ] } ], "source": [ "!pip install gradio" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "XTmWAVdr4NKU", "outputId": "f0e73bd3-9798-4f22-85d6-d39ea7292061" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", "Collecting transformers\n", " Downloading transformers-4.27.4-py3-none-any.whl (6.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m58.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.9/dist-packages (from transformers) (1.22.4)\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.11.0 in /usr/local/lib/python3.9/dist-packages (from transformers) (0.13.3)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.9/dist-packages (from transformers) (2022.10.31)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from transformers) (3.10.7)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from transformers) (2.27.1)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.9/dist-packages (from transformers) (6.0)\n", "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.9/dist-packages (from transformers) (4.65.0)\n", "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1\n", " Downloading tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m21.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from transformers) (23.0)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.9/dist-packages (from huggingface-hub<1.0,>=0.11.0->transformers) (4.5.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (2022.12.7)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (3.4)\n", "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (2.0.12)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (1.26.15)\n", "Installing collected packages: tokenizers, transformers\n", "Successfully installed tokenizers-0.13.3 transformers-4.27.4\n" ] } ], "source": [ "!pip install transformers" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "85RiizMcwM6b", "outputId": "c4ff5378-2118-4296-c52e-829a79bfb846" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[nltk_data] Downloading package punkt to\n", "[nltk_data] C:\\Users\\Asus\\AppData\\Roaming\\nltk_data...\n", "[nltk_data] Package punkt is already up-to-date!\n", "[nltk_data] Downloading package stopwords to\n", "[nltk_data] C:\\Users\\Asus\\AppData\\Roaming\\nltk_data...\n", "[nltk_data] Package stopwords is already up-to-date!\n" ] } ], "source": [ "import gradio as gr\n", "import pandas as pd\n", "from torch import nn\n", "from transformers import BertModel\n", "from transformers import BertTokenizer\n", "from sklearn.metrics import f1_score\n", "import torch\n", "import nltk\n", "nltk.download(['punkt', 'stopwords'])\n", "import re" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "VHMiihOCwvEY" }, "outputs": [], "source": [ "def remove_short_strings(df:pd.DataFrame, string_column:str)->pd.DataFrame:\n", " df[string_column] = df[string_column].astype(str)\n", " df['length'] = df[string_column].str.len()\n", " df = df.drop(df[df['length'] == 1].index)\n", " df = df.drop(columns=['length'])\n", " return df\n", "def remove_one_character_words(row):\n", " words = row['text'].split()\n", " return ' '.join([word for word in words if len(word) > 1])\n", "def ret_list_to_str(liste):\n", " return \" \".join (i for i in liste)\n", "def preprocess_tweet(tweet):\n", " # Convert to lower case\n", " tweet = tweet.lower() \n", " # Replace repeating characters\n", " tweet = re.sub(r'(.)\\1+', r'\\1\\1', tweet)\n", " # Remove non-Turkish characters\n", " tweet = re.sub(r'[^a-zA-ZçÇğĞıİöÖşŞüÜ\\s]', '', tweet)\n", " # Remove extra whitespaces\n", " tweet = re.sub(r'\\s+', ' ', tweet).strip()\n", " return tweet\n", "def cleaning_stopwords(text,stop_words):\n", " return \" \".join([word for word in str(text).split() if word not in stop_words])\n", "from nltk.corpus import stopwords\n", "# Türkçe stop words\n", "turkish_stopwords = stopwords.words('turkish')\n", "turkish_stopwords.append(\"bir\")\n", "turkish_stopwords=set(turkish_stopwords)\n", " ##burada saçma kelimeler var bunu kullanmayalım \n", "\n", "\n", "from sklearn import preprocessing\n", "from nltk.tokenize import word_tokenize\n", "\n", "\n", "def prep_and_sw_and_tokenize(df):\n", "\n", " turkish_stopwords = stopwords.words('turkish')\n", " turkish_stopwords.append(\"bir\")\n", " stop_words=set(turkish_stopwords)\n", " df[\"text\"]=df[\"text\"].apply(preprocess_tweet)\n", " df['text'] = df[\"text\"].apply(lambda text: cleaning_stopwords(text,stop_words))\n", "\n", " #df['text'] = df.apply(remove_one_character_words, axis=1)\n", "\n", "\n", " return df" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 113, "referenced_widgets": [ "5f331d1562aa4655a7513ebe96e8e543", "5cbc3b58ceb7477aaaf2bb6198f761ae", "a75a8d3737c0495fa94b0d37a7ac7cb2", "fc163ea867554162918e86086bf16346", "16e1c6eff9434dccbe888bf31e846cb3", "6e2b50925eba416d977a515a96253c8d", "66ce6f6abe62413cafc531ddeab1b234", "4c444591789a439ea012c19b9a65943c", "e95963b069c0430493068fccad39e549", "05ba2899712c41dfbe19b47e310d6f11", "f36aadfead2f4ea5bffc5380d1389f83", "f7dcb80ead674ea2acca13be918cf29f", "6f6bb49fc5cb49b2b61d5da84a85df48", "693f00434dd34f6ebbbd3454e20e6f09", "182030415ce843bfb55c450688f1173c", "26489266a0af48769920239061ea348c", "44b676a38c90434c92230d740d8d953b", "00a05a5b259d4e9d8219d18f1b6fd9fe", "a1e9be8d15fa48bda098fca408cb1dc3", "a6aad0e220554fafabc82b1578ea399b", "2a6fbcb692504742bd5382bffda82d84", "fb886858d4ad4ac0a1d9e8e56941b246", "64a97722956b42e78978c674320bffc4", "43c825ca23ee4fa4818012fd0d70e7a0", "e38e3d64e0f94ff28ce00b145aab4b7d", "e70480d3a9d74488b8412057b1c112e7", "75f16475449a4af69679a624e7d80b72", "b4eb927b4c0143cab3346d4521245103", "c4325e65d0b04cc0bce40f6f4a273eb2", "90d1288635e84948b23e428c59c140e8", "a89f2e52c5ab4210915eec5bb8a67162", "fd086c70086d445b9152c65db68ffa40", "3c18c7c120c2428595d75cd10bc59ad4" ] }, "id": "P75bE83_xJEt", "outputId": "9db8a1e1-33ca-47fc-875f-ce13480e4209" }, "outputs": [], "source": [ "\n", "tokenizer = BertTokenizer.from_pretrained(\"dbmdz/bert-base-turkish-128k-uncased\")\n", "class BertClassifierConv1D(nn.Module):\n", " def __init__(self, dropout=0.5, num_classes=5):\n", " super(BertClassifierConv1D, self).__init__()\n", " \n", " self.bert = BertModel.from_pretrained('dbmdz/bert-base-turkish-128k-uncased', return_dict=True)\n", " self.conv1d = nn.Conv1d(in_channels=self.bert.config.hidden_size, out_channels=128, kernel_size=5)\n", " self.bilstm = nn.LSTM(input_size=128, hidden_size=64, num_layers=1, bidirectional=True, batch_first=True)\n", " self.dropout = nn.Dropout(dropout)\n", " self.linear = nn.Linear(128, num_classes)\n", "\n", " def forward(self, input_id, mask):\n", " output = self.bert(input_ids=input_id, attention_mask=mask).last_hidden_state\n", " output = output.permute(0, 2, 1) # swap dimensions to prepare for Conv1d layer\n", " output = self.conv1d(output)\n", " output, _ = self.bilstm(output.transpose(1, 2))\n", " output = self.dropout(output)\n", " output = self.linear(output.mean(dim=1))\n", " return output\n", "class Dataset(torch.utils.data.Dataset):\n", " def __init__(self, df):\n", " self.texts = [tokenizer(text, padding='max_length', max_length=512, truncation=True, return_tensors=\"pt\") for text in df]\n", "\n", " def __len__(self):\n", " return len(self.texts)\n", "\n", " def __getitem__(self, idx):\n", " batch_texts = self.texts[idx]\n", " return batch_texts\n", "def evaluate(model, test_data):\n", "\n", " test = Dataset(test_data)\n", "\n", " test_dataloader = torch.utils.data.DataLoader(test, batch_size=32)\n", "\n", " #use_cuda = torch.cuda.is_available()\n", " #device = torch.device(\"cuda\" if use_cuda else \"cpu\")\n", " device= torch.device(\"cpu\")\n", "\n", " #if use_cuda:\n", "\n", " # model = model.cuda()\n", "\n", " total_acc_test = 0\n", " output_indices = []\n", " with torch.no_grad():\n", "\n", " for test_input in test_dataloader:\n", "\n", " mask = test_input['attention_mask'].to(device)\n", " input_id = test_input['input_ids'].squeeze(1).to(device)\n", "\n", " output = model(input_id, mask)\n", " \n", "\n", " batch_indices = output.argmax(dim=1).tolist()\n", " output_indices.extend(batch_indices)\n", "\n", " \n", " \n", " return output_indices\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "id": "l_Gfr3A_wSBm" }, "outputs": [], "source": [ "def auth(username, password):\n", " if username == \"Hive_Hereos\" and password == \"Y2IB3HV8GBXED00S\":\n", " return True\n", " else:\n", " return False\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of the model checkpoint at dbmdz/bert-base-turkish-128k-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight']\n", "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "global model\n", "model =BertClassifierConv1D()\n", "\n", "model.load_state_dict(torch.load(r\"sontotalmodel_finallll.pt\", map_location=torch.device('cpu')))\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "id": "TafHYRYDwcdn" }, "outputs": [], "source": [ "import logging\n", "logging.basicConfig(filename=r'app.log', filemode='w', format='%(asctime)s - %(message)s', level=logging.INFO)\n", "\n", "\n", "def predict(df):\n", " # TODO:\n", " df[\"offensive\"] = 1 \n", " df[\"target\"] = None\n", " # ***************************\n", " try:\n", " # WRITE YOUR INFERENCE STEPS BELOW # HERE\n", " text=df[\"text\"]\n", " df=prep_and_sw_and_tokenize(df)\n", " #df.to_csv(\"preprocess.csv\", index=False, sep=\"|\")\n", " labels = {'INSULT':0,\n", " 'OTHER':1,\n", " 'PROFANITY':2,\n", " 'RACIST':3,\n", " 'SEXIST':4\n", " }\n", " logging.info(\"Başlıyoruz\")\n", " \n", " logging.info(\"Model yüklendi\")\n", " logging.info(df.text)\n", " a=evaluate(model, df[\"text\"])\n", " \n", " test_labels=[]\n", " for number in a:\n", " label = list(labels.keys())[list(labels.values()).index(number)] # Sayıyı etikete dönüştürüyoruz.\n", " test_labels.append(label) # Yeni etiketi listeye ekliyoruz.\n", " df[\"target\"]=test_labels\n", " \n", " for index, row in df.iterrows():\n", " if row['target'] == 'OTHER':\n", " df.at[index, 'offensive'] = 0\n", " df[\"text\"]=text\n", " except Exception as e:\n", " logging.error(\"Error occurred\", exc_info=True) \n", " raise e\n", " #\n", " # *********** END ***********\n", "\n", "\n", " return df\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "id": "tFwQjDPowfWs" }, "outputs": [], "source": [ "def get_file(file):\n", " output_file = \"output_Hive_Hereos.csv\"\n", "\n", " # For windows users, replace path seperator\n", " file_name = file.name.replace(\"\\\\\", \"/\")\n", "\n", " df = pd.read_csv(file_name, sep=\"|\")\n", "\n", " predict(df)\n", " df.to_csv(output_file, index=False, sep=\"|\")\n", " return (output_file)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "PQI8hfLxwjEX", "outputId": "198e3795-5917-4a91-ed21-8ebccb0da373", "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7860\n", "Running on public URL: https://d7857c2e67677f4433.gradio.live\n", "\n", "This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces\n" ] } ], "source": [ "# Launch the interface with user password\n", "iface = gr.Interface(get_file, \"file\", \"file\")\n", "\n", "if __name__ == \"__main__\":\n", " iface.launch(share=True, auth=auth,debug=True)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "id": "pdgqxuRE5Gyr" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Closing server running on port: 7860\n" ] } ], "source": [ "iface.close()" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(r\"C:\\Users\\Asus\\Desktop\\teknofest\\codes\\datas\\teknofest_train_final.csv\", sep=\"|\")\n", "df=df[1:16]\n", "df=df[[\"id\",\"text\"]]\n", "df.to_csv(r\"C:\\Users\\Asus\\Desktop\\teknofest\\codes\\datas\\deneme_test.csv\", index=False, sep=\"|\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import session_info\n", "session_info.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "accelerator": "GPU", "colab": { "provenance": [] }, "gpuClass": "standard", "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "00a05a5b259d4e9d8219d18f1b6fd9fe": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "05ba2899712c41dfbe19b47e310d6f11": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "16e1c6eff9434dccbe888bf31e846cb3": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "182030415ce843bfb55c450688f1173c": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_2a6fbcb692504742bd5382bffda82d84", "placeholder": "​", "style": "IPY_MODEL_fb886858d4ad4ac0a1d9e8e56941b246", "value": " 59.0/59.0 [00:00<00:00, 588B/s]" } }, "26489266a0af48769920239061ea348c": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2a6fbcb692504742bd5382bffda82d84": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3c18c7c120c2428595d75cd10bc59ad4": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "43c825ca23ee4fa4818012fd0d70e7a0": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_b4eb927b4c0143cab3346d4521245103", "placeholder": "​", "style": "IPY_MODEL_c4325e65d0b04cc0bce40f6f4a273eb2", "value": "Downloading (…)lve/main/config.json: 100%" } }, "44b676a38c90434c92230d740d8d953b": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4c444591789a439ea012c19b9a65943c": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "5cbc3b58ceb7477aaaf2bb6198f761ae": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6e2b50925eba416d977a515a96253c8d", "placeholder": "​", "style": "IPY_MODEL_66ce6f6abe62413cafc531ddeab1b234", "value": "Downloading (…)solve/main/vocab.txt: 100%" } }, "5f331d1562aa4655a7513ebe96e8e543": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_5cbc3b58ceb7477aaaf2bb6198f761ae", "IPY_MODEL_a75a8d3737c0495fa94b0d37a7ac7cb2", "IPY_MODEL_fc163ea867554162918e86086bf16346" ], "layout": "IPY_MODEL_16e1c6eff9434dccbe888bf31e846cb3" } }, "64a97722956b42e78978c674320bffc4": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_43c825ca23ee4fa4818012fd0d70e7a0", "IPY_MODEL_e38e3d64e0f94ff28ce00b145aab4b7d", "IPY_MODEL_e70480d3a9d74488b8412057b1c112e7" ], "layout": "IPY_MODEL_75f16475449a4af69679a624e7d80b72" } }, "66ce6f6abe62413cafc531ddeab1b234": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "693f00434dd34f6ebbbd3454e20e6f09": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a1e9be8d15fa48bda098fca408cb1dc3", "max": 59, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_a6aad0e220554fafabc82b1578ea399b", "value": 59 } }, "6e2b50925eba416d977a515a96253c8d": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "6f6bb49fc5cb49b2b61d5da84a85df48": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_44b676a38c90434c92230d740d8d953b", "placeholder": "​", "style": "IPY_MODEL_00a05a5b259d4e9d8219d18f1b6fd9fe", "value": "Downloading (…)okenizer_config.json: 100%" } }, "75f16475449a4af69679a624e7d80b72": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "90d1288635e84948b23e428c59c140e8": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a1e9be8d15fa48bda098fca408cb1dc3": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a6aad0e220554fafabc82b1578ea399b": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "a75a8d3737c0495fa94b0d37a7ac7cb2": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_4c444591789a439ea012c19b9a65943c", "max": 1233088, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_e95963b069c0430493068fccad39e549", "value": 1233088 } }, "a89f2e52c5ab4210915eec5bb8a67162": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "b4eb927b4c0143cab3346d4521245103": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "c4325e65d0b04cc0bce40f6f4a273eb2": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "e38e3d64e0f94ff28ce00b145aab4b7d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_90d1288635e84948b23e428c59c140e8", "max": 386, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_a89f2e52c5ab4210915eec5bb8a67162", "value": 386 } }, "e70480d3a9d74488b8412057b1c112e7": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_fd086c70086d445b9152c65db68ffa40", "placeholder": "​", "style": "IPY_MODEL_3c18c7c120c2428595d75cd10bc59ad4", "value": " 386/386 [00:00<00:00, 17.3kB/s]" } }, "e95963b069c0430493068fccad39e549": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "f36aadfead2f4ea5bffc5380d1389f83": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "f7dcb80ead674ea2acca13be918cf29f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_6f6bb49fc5cb49b2b61d5da84a85df48", "IPY_MODEL_693f00434dd34f6ebbbd3454e20e6f09", "IPY_MODEL_182030415ce843bfb55c450688f1173c" ], "layout": "IPY_MODEL_26489266a0af48769920239061ea348c" } }, "fb886858d4ad4ac0a1d9e8e56941b246": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "fc163ea867554162918e86086bf16346": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_05ba2899712c41dfbe19b47e310d6f11", "placeholder": "​", "style": "IPY_MODEL_f36aadfead2f4ea5bffc5380d1389f83", "value": " 1.23M/1.23M [00:00<00:00, 6.07MB/s]" } }, "fd086c70086d445b9152c65db68ffa40": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } } } } }, "nbformat": 4, "nbformat_minor": 1 }