{ "metadata": { "kernelspec": { "language": "python", "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python", "version": "3.7.12", "mimetype": "text/x-python", "codemirror_mode": { "name": "ipython", "version": 3 }, "pygments_lexer": "ipython3", "nbconvert_exporter": "python", "file_extension": ".py" }, "colab": { "provenance": [], "include_colab_link": true } }, "nbformat_minor": 0, "nbformat": 4, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "code", "source": [ "# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n", "# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session\n", "\n", "import numpy as np \n", "import pandas as pd " ], "metadata": { "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", "execution": { "iopub.status.busy": "2022-10-28T02:46:40.023210Z", "iopub.execute_input": "2022-10-28T02:46:40.024111Z", "iopub.status.idle": "2022-10-28T02:46:40.047013Z", "shell.execute_reply.started": "2022-10-28T02:46:40.024018Z", "shell.execute_reply": "2022-10-28T02:46:40.046119Z" }, "trusted": true, "id": "V06UUnQa1Cjt" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "train_df = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/train.csv')\n", "display(len(train_df))\n", "display(train_df.head(3))\n", "train_annot_df = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/train_annotations.csv')\n", "display(len(train_annot_df))\n", "display(train_annot_df.head(3))" ], "metadata": { "execution": { "iopub.status.busy": "2022-10-28T02:46:44.475857Z", "iopub.execute_input": "2022-10-28T02:46:44.476564Z", "iopub.status.idle": "2022-10-28T02:46:44.724851Z", "shell.execute_reply.started": "2022-10-28T02:46:44.476517Z", "shell.execute_reply": "2022-10-28T02:46:44.723861Z" }, "trusted": true, "id": "McQ0e-rw1Cjv", "outputId": "c0aa2a6f-d842-4a3d-fb22-3b6ae1470b5f" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": "30083" }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": " StudyInstanceUID ETT - Abnormal \\\n0 1.2.826.0.1.3680043.8.498.26697628953273228189... 0 \n1 1.2.826.0.1.3680043.8.498.46302891597398758759... 0 \n2 1.2.826.0.1.3680043.8.498.23819260719748494858... 0 \n\n ETT - Borderline ETT - Normal NGT - Abnormal NGT - Borderline \\\n0 0 0 0 0 \n1 0 1 0 0 \n2 0 0 0 0 \n\n NGT - Incompletely Imaged NGT - Normal CVC - Abnormal CVC - Borderline \\\n0 0 1 0 0 \n1 1 0 0 0 \n2 0 0 0 1 \n\n CVC - Normal Swan Ganz Catheter Present PatientID \n0 0 0 ec89415d1 \n1 1 0 bf4c6da3c \n2 0 0 3fc1c97e5 ", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
StudyInstanceUIDETT - AbnormalETT - BorderlineETT - NormalNGT - AbnormalNGT - BorderlineNGT - Incompletely ImagedNGT - NormalCVC - AbnormalCVC - BorderlineCVC - NormalSwan Ganz Catheter PresentPatientID
01.2.826.0.1.3680043.8.498.26697628953273228189...00000010000ec89415d1
11.2.826.0.1.3680043.8.498.46302891597398758759...00100100010bf4c6da3c
21.2.826.0.1.3680043.8.498.23819260719748494858...000000001003fc1c97e5
\n
" }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": "17999" }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": " StudyInstanceUID label \\\n0 1.2.826.0.1.3680043.8.498.12616281126973421762... CVC - Normal \n1 1.2.826.0.1.3680043.8.498.12616281126973421762... CVC - Normal \n2 1.2.826.0.1.3680043.8.498.72921907356394389969... CVC - Borderline \n\n data \n0 [[1487, 1279], [1477, 1168], [1472, 1052], [14... \n1 [[1328, 7], [1347, 101], [1383, 193], [1400, 2... \n2 [[801, 1207], [812, 1112], [823, 1023], [842, ... ", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
StudyInstanceUIDlabeldata
01.2.826.0.1.3680043.8.498.12616281126973421762...CVC - Normal[[1487, 1279], [1477, 1168], [1472, 1052], [14...
11.2.826.0.1.3680043.8.498.12616281126973421762...CVC - Normal[[1328, 7], [1347, 101], [1383, 193], [1400, 2...
21.2.826.0.1.3680043.8.498.72921907356394389969...CVC - Borderline[[801, 1207], [812, 1112], [823, 1023], [842, ...
\n
" }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "# value counts\n", "train_df.drop(columns=['StudyInstanceUID','PatientID']).agg(['sum'])\n", "# unbalanced dataset" ], "metadata": { "execution": { "iopub.status.busy": "2022-10-28T02:46:59.167135Z", "iopub.execute_input": "2022-10-28T02:46:59.167596Z", "iopub.status.idle": "2022-10-28T02:46:59.208167Z", "shell.execute_reply.started": "2022-10-28T02:46:59.167559Z", "shell.execute_reply": "2022-10-28T02:46:59.207260Z" }, "trusted": true, "id": "NdNcRIOP1Cjx", "outputId": "e602d509-882f-4f7b-a237-eb0f5a2af756" }, "execution_count": null, "outputs": [ { "execution_count": 3, "output_type": "execute_result", "data": { "text/plain": " ETT - Abnormal ETT - Borderline ETT - Normal NGT - Abnormal \\\nsum 79 1138 7240 279 \n\n NGT - Borderline NGT - Incompletely Imaged NGT - Normal \\\nsum 529 2748 4797 \n\n CVC - Abnormal CVC - Borderline CVC - Normal \\\nsum 3195 8460 21324 \n\n Swan Ganz Catheter Present \nsum 830 ", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ETT - AbnormalETT - BorderlineETT - NormalNGT - AbnormalNGT - BorderlineNGT - Incompletely ImagedNGT - NormalCVC - AbnormalCVC - BorderlineCVC - NormalSwan Ganz Catheter Present
sum7911387240279529274847973195846021324830
\n
" }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "import matplotlib.pyplot as plt\n", "import seaborn as sns" ], "metadata": { "execution": { "iopub.status.busy": "2022-10-28T02:47:01.769145Z", "iopub.execute_input": "2022-10-28T02:47:01.769618Z", "iopub.status.idle": "2022-10-28T02:47:02.507132Z", "shell.execute_reply.started": "2022-10-28T02:47:01.769578Z", "shell.execute_reply": "2022-10-28T02:47:02.506044Z" }, "trusted": true, "id": "cONSDexl1Cjy" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# value counts\n", "train_df.drop(columns=['StudyInstanceUID','PatientID']).agg(['sum']).T.sort_values(by='sum').plot(kind='barh')\n", "plt.legend(loc='lower right');" ], "metadata": { "execution": { "iopub.status.busy": "2022-10-28T02:47:04.180989Z", "iopub.execute_input": "2022-10-28T02:47:04.181680Z", "iopub.status.idle": "2022-10-28T02:47:04.493106Z", "shell.execute_reply.started": "2022-10-28T02:47:04.181644Z", "shell.execute_reply": "2022-10-28T02:47:04.491889Z" }, "trusted": true, "id": "rd4yEDyL1Cjz", "outputId": "738805db-8fab-48fe-81de-ece60953337c" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": "
", "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfIAAAD4CAYAAAAXfWQCAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAAsTAAALEwEAmpwYAAAvf0lEQVR4nO3de5xVdb3/8ddbQPCKNyLMjmPl5YgowngrMUxT0cpLFhjmtTh17GYHk7JjdDodLbM8pmkcf4h6TMxbh8K838j7DLcBFQvFEg1FEwUFcfj8/ljfrYvNzJ49M3tmWPp+Ph77wVrf9V3f72etGeaz13etvb+KCMzMzKyYNujpAMzMzKzjnMjNzMwKzInczMyswJzIzczMCsyJ3MzMrMB693QA9t6zzTbbRF1dXU+HYWZWKI2NjUsjYkB5uRO5dbu6ujoaGhp6Ogwzs0KR9ExL5R5aNzMzKzAncjMzswJzIjczMysw3yO3bte0eBl1E6b3dBhmZt1q0blHdEm7viI3MzMrMCfyGpP0fklTJS2U1CjpZkk7SXpK0s5ldS+QdGZa3lvSfZIWSJol6TJJG7ej34mSXpf0vlzZ8todWVUxjJT0h+7s08zsvc6JvIYkCbgJuCciPhwRw4HvAgOBqcCYXN0NgGOBqZIGAtcBZ0bEzhGxJ3ALsFk7Q1gK/FtHY08xmZlZgfgPd20dCKyOiEtLBRExJyJmANcAo3N1DwCeiYhngNOAKyLiwdx+10fEknb2PxkYLWmr8g2Svi1pXnp9K5XVpRGAK4F5wAhJT0iaIulJSVdLOljS/ZL+LGnvtN/ekh5MIwcPlI80mJlZ93Eir63dgMaWNkREE7BG0h6paAxZcq+4XzstJ0vm38wXShoOnAzsA+wLfFnSnmnzjsCvImIw8AzwEeB8YJf0+gKwPzAe+F7a5wlgRBo5OBv4r7YCkzROUoOkhubXl3XqIM3M7B1O5N3rGmCMpN7AUWTD6bV2IXCipPyw/P7ATRGxIiKWAzcCI9K2ZyLioVzdpyOiKSLWAPOBOyMigCagLtXpD1wnaR7wC2BwW0FFxKSIqI+I+l4b9+/M8ZmZWY4TeW3NB4ZX2D4V+DxwMDA3N3Te1n4ASLpc0mxJN7dWJyJeAX5DNlxfjRVl66tyy2ty62t45+OKPwLujojdgE8D/arsy8zMasyJvLbuAvpKGlcqkLS7pBEAEbGQ7IG0c3lnWB3gIrKr6H1y+x2THoJ7W0ScHBFDI+LwNuL4OfAvvJN4ZwBHSdpY0ibA0amso/oDi9PySZ1ox8zMOsmJvIbSEPTRwMHp42fzgXOAv+eqXUN27/nG3H5LyO6Z/yw9fPY4cCjwWgfjWEr29HzftD4TmAI8AjwMXBYRszrSdvJT4BxJs/CXCpmZ9Shluces+9TX14dnPzMzax9JjRFRX17uK3IzM7MCcyI3MzMrMCdyMzOzAnMiNzMzKzAncjMzswJzIjczMyswJ3IzM7MCcyI3MzMrMCdyMzOzAvPXa1q3a1q8jLoJ03s6DOtmi849oqdDMHtX8hW5mZlZgTmRt0DS+yVNTROfNEq6WdJOkp6StHNZ3QsknZmW95Z0X5r4ZJakyyRt3I5+J0panKYqfULSJZI6/DOSVJfmDG/vfsvTv9tKur6j/ZuZWddzIi8jSWQzh90TER+OiOHAd4GBZPOJj8nV3QA4Fpiaphy9DjgzInaOiD2BW4DN2hnCLyJiKLArMAT4eDti79StkvL9I+K5iDi2M22amVnXciJf14HA6oi4tFQQEXMiYgbZFKSjc3UPAJ6JiGeA04ArIuLB3H7XpylKO2JDoB/wDwBJQyU9JGmupJskbZnK70mjAg3ANyUNlzRH0pwUE6leL0nnSXo0tfEvqXykpBmSpgGP5QPIX9FLOknSjZJukfRnST/N1TtE0oOSZkq6TtKmHTxmMzNrJyfyde0GNLa0ISKagDWS9khFY8iSe8X92ul0SbOB54EnI2J2Kr+S7Gp/d6AJ+EFunw0joj4izgcuB74eEXuwtlOBZRGxF7AX8GVJO6Rtw4BvRsRObcQ2lOyNzBBgtKQPStoG+D5wcEQMAxqAb5fvKGmcpAZJDc2vL2v7LJiZWVWcyNvvGmBMGoY+imw4vZZKQ+vvAzaRNEZSf2CLiLg31bmCbDSg5FoASVukevel8qtydQ4BTkhvEh4GtgZ2TNseiYinq4jtzohYFhErya7etwf2JbsNcH9q+8RUvpaImJTebNT32rh/FV2ZmVk1/PGzdc0nu+/dmqnAbcC9wNzc0Pl8YDjwf5Ual3Q5sCfwXEQc3lq9iFgt6RayhP3HNmJe0cZ2AJFdqd9aFs/IKvcHWJVbbib7/RFwe0QcV2UbZmZWQ74iX9ddQF9J40oFknaXNAIgIhYCS4FzeWdYHeAi4ERJ++T2OyY9BPe2iDg5IoZWSuJpXwEfAxZGxDLgH6UYgC+SvZFYS0S8Arwiaf9UNDa3+Vbgq5L6pPZ3krRJpRiq9BDwMUkfSe1uIqmtIXozM6sRJ/IyERHA0cDB6eNn84FzgL/nql0D7ALcmNtvCdk985+lj589DhwKvNbOEEr3yOcBvYBfpfITgfMkzSW7V/0frex/MnBxakO58svIhsNnpgfYfk0NRmQi4kXgJOCaFNuDZOfGzMy6gbK8ZdZ96uvro6GhoafDMDMrFEmNEVFfXu4rcjMzswJzIjczMyswJ3IzM7MCcyI3MzMrMCdyMzOzAnMiNzMzKzAncjMzswJzIjczMyswJ3IzM7MC86Qp1u2aFi+jbsL0ng7DqrTo3CN6OgQzq8BX5GZmZgXmK/IuJKkZaMoVTQX2AXYANgUGAKV5wAeTTYW6FbARsDiVHxURi6rsL4CfR8S/pfXxwKYRMbFTB9IOku4BxkeEv0zdzKwbOJF3rTciYmhLG9I84OMj4lNl5ScB9RHxtQ70two4RtI5EbG0vTtL6h0Rb3WgXzMz6yFO5O8ubwGTgNOBs/IbJNUBk4FtgBeBkyPir5KmACuBPYH7JW0FvJHW3wecApwA7Ac8HBEnpfYuAfYiGz24PiJ+0MXHZmZmLfA98q61kaTZudfobujzYmCspP5l5b8EroiI3YGrgQtz27YDPhoR307rW5Il7tOBacAvyIb+h0gamuqclabT2x34uKTdKwUlaZykBkkNza8v6/jRmZnZWnxF3rVaHVrvKhHxqqQrgW+QXVmX7Acck5avAn6a23ZdRDTn1n8fESGpCVgSEU0AkuYDdcBs4POSxpH9Dg0CdgXmVohrEtloAX0H7RgdPkAzM1uLr8gLRNI+uav7z1SoegFwKrBJlU2vKFtflf5dk1surfeWtAMwHjgoXeFPB/pV2ZeZmdWQE3mBRMTDETE0vaZVqPcy8FuyZF7yADAmLY8FZnQilM3Jkv8ySQOBUZ1oy8zMOsFD611rI0mzc+u3RMSEbur7fCD/5PvXgcslnUF62K2jDUfEHEmzgCeAvwH3dyZQMzPrOEX4dqV1r/r6+mho8MfMzczaQ1Jjesh4LR5aNzMzKzAncjMzswJzIjczMyswJ3IzM7MCcyI3MzMrMCdyMzOzAnMiNzMzKzAncjMzswJzIjczMyswf0WrdbumxcuomzC9p8NYby0694ieDsHMCsRX5DUmKSSdn1sfL2libv14SXMlzZc0R9JlkkbkZjV7WdLTafmOdvQ7MvX96VzZHySNrNGhVRvH8u7sz8zsvc6JvPZWAcdI2qZ8g6TDgNOBURExGBhGNivZC6VZzYBpwBlp/eB29v0scFZHA5fUq6P7mplZz3Air723gElkCbvcWcD4iFgMEBHNETE5IhbUqO85ZFOLfrJ8g6SDJM2S1CRpsqS+qXyRpJ9Imgl8Lq2fk0YEGiQNk3SrpIWSvpL22VTSnZJmpvaOrFH8ZmbWTk7kXeNiYKyk/mXlg4GZXdz3j4Hv5wsk9QOmAKMjYgjZsxFfzVV5KSKGRcTUtP7XNDowI+13LLAv8MO0fSVwdEQMAw4EzpekLjkaMzOryIm8C0TEq8CVwDdaqyNpSLrqXShpdA37vi+1v3+ueGfg6Yh4Mq1fARyQ235tWTPT0r9NwMMR8VpEvAiskrQFIOC/JM0F7gA+AAysFJekcekKv6H59WUdODIzM2uJE3nXuQA4FdgkVzaf7L44EdGUrnr/CGxUTYOSTss9FLdtharrXJW3YUXZ+qr075rccmm9NzAWGAAMT8ewBOhXqYOImBQR9RFR32vj8oEKMzPrKCfyLhIRLwO/JUvmJecAP5O0Xa6sqiSe2ry49FBcRDxXod5twJbA7qloAVAn6SNp/YvAvdX224L+ZA/orZZ0ILB9J9oyM7NOcCLvWucDbz+9HhE3AxcCf5T0mKQHgGbg1i7o+8fAB1O/K4GTgeskNZFdWV/aibavBupTWycAT3QyVjMz6yBFRE/HYO8x9fX10dDQ0NNhmJkViqTGiKgvL/cVuZmZWYE5kZuZmRWYE7mZmVmBOZGbmZkVmBO5mZlZgTmRm5mZFZgTuZmZWYE5kZuZmRWYE7mZmVmB9e7pAOy9p2nxMuomTO/pMDps0blH9HQIZmZv8xW5mZlZgTmRt0HS+yVNTfOGN0q6WdJOkp6StHNZ3QsknZmW95Z0n6QFkmZJukzSxh3of7akqWVl90ha5/t2e5qkkZL+0NNxmJm9lziRVyBJwE3APRHx4YgYDnwXGAhMBcbk6m4AHAtMlTQQuA44MyJ2jog9gVuAzdrZ/z8DvYARkjZpq35nKOPfBzOzgvEf7soOBFZHxNtTfkbEnIiYAVwDjM7VPQB4JiKeAU4DroiIB3P7XR8RS9rZ/3HAVcBtwJFl276YrtbnSdobQNJESZPTFftTkr5Rqizp26nuPEnfSmV1acTgSmAe2RuGJyRNkfSkpKslHSzpfkl/zvWzt6QH00jDA+UjE2Zm1n2cyCvbDWhsaUNENAFrJO2RisaQJfeK+7XTaLIr/2vIknrexhExFPhXYHKufBfgUGBv4AeS+kgaTjYf+T7AvsCXJe2Z6u8I/CoiBgPPAB8hm0d9l/T6ArA/MB74XtrnCWBEGmk4G/ivtg5E0jhJDZIaml9fVv0ZMDOzipzIO+caYIyk3sBRZMPpNZHugS+NiL8CdwJ7StqqrG8i4j5gc0lbpPLpEbEqIpYCL5DdBtgfuCkiVkTEcuBGYESq/0xEPJRr9+mIaIqINcB84M7IJq1vAupSnf7AdZLmAb8ABrd1PBExKSLqI6K+18b923cyzMysVU7klc0HhlfYPhX4PHAwMDc3dN7WfgBIujwNj9/cwubjgF0kLQIWApsDn81tj7L6pfVVubJm2v6I4Yqy9fz+a3Lra3Jt/Qi4OyJ2Az4N9GujDzMz6yJO5JXdBfSVNK5UIGl3SSMAImIhsBQ4l3eG1QEuAk6UtE9uv2PSQ3Bvi4iTI2JoRByeL08PnX0eGBIRdRFRR3aPPD+8PjrV3R9YFhGVxqtnAEdJ2jg9NHd0Kuuo/sDitHxSJ9oxM7NOciKvIA0pHw0cnD5+Nh84B/h7rto1ZPeSb8ztt4TsnvnP0sNkj5Pdt36tyq5HAIsj4rlc2X3ArpIGpfWVkmYBlwKntnEcM4EpwCPAw8BlETGrylha8lPgnNS/v1TIzKwHKctVZt2nvr4+GhoaejoMM7NCkdQYEet8h4ivyM3MzArMidzMzKzAnMjNzMwKzInczMyswJzIzczMCsyJ3MzMrMCcyM3MzArMidzMzKzAnMjNzMwKzF+vad2uafEy6iZM7+kwKlp07hE9HYKZWVV8RW5mZlZgnUrkkkLS+bn18ZIm5taPlzRX0nxJcyRdJmlEmrpztqSXJT2dlu9oR78jJf2hM7HXmqQpko5to85JkratRVtl9e9J85f3KEl1aY5yMzPrJp29Il8FHCNpm/INkg4DTgdGRcRgYBjwAPBCmrpzKDANOCOtH9zJWIrgJKDNRG5mZlatzibyt4BJZAm73FnA+IhYDBARzRExOSIWdLLPtUiaKGlyuip9StI3cttOSCMCcyRdlcrqJN2Vyu+U9E+pfIqkSyQ9lNoZmdp9XNKUXJvLJf0ijTLcKWlACzENl3SvpEZJt0oalK6w64Gr0wjERi3VK2vnE5J+l1v/pKSb2jgfyyWdl+K7Q9LeuXPzmdw5mCFpZnp9NJVvIOlXkp6QdLukm0sjA63FmsrnSJoDnNauH56ZmXVaLe6RXwyMldS/rHwwMLMG7VdjF7L5vvcGfiCpj6TBwPeBT0TEHsA3U91fAldExO7A1cCFuXa2BPYje2MyDfgF2XEMkTQ01dkEaEijDPcCP8gHIqlP6uPYiBgOTAZ+HBHXAw3A2DQa8VZL9cqO625gl9ybhZNTvUo2Ae5K8b0G/CfwSbJ51f8j1XkB+GREDANG587BMUAdsCvwxXQuWj2mtM/lwNfTOW6VpHGSGiQ1NL++rI1DMDOzanX6qfWIeFXSlcA3gDdaqiNpCHAVsBnwvYi4trP9lpkeEauAVZJeAAYCnwCui4ilKc6XU939yBIWKaaf5tr5fUSEpCZgSUQ0pfjnkyW42cAaoBT//wI3lsWyM7AbcLskgF7A8y3E3Ga9FMtVwPGSLk+xn9DGuXgTuCUtNwGrImJ1Oqa6VN4HuCi9OWkGdkrl+5OdszXA3yXdXSlWSVsAW0TEfaneVcColoKKiElkozf0HbRjtHEMZmZWpVp9/OwCsqvvy3Nl88nui9+dEuJQSRcBG1XToKTTgC+n1cMj4rkK1Vfllpvp+HGV2llT1uaaCm2WJyUB8yNivzb6qrbe5cDvgZVkSfatNuqvjohSTG8fR0SskVQ6htOBJcAeZKMyKzsSa0rkZmbWg2ry8bN0tftb4NRc8TnAzyRtlyurKomnNi8uPRTXRhJvzV3A5yRtDSBpq1T+ADAmLY8FZrSz3Q2A0hPlXwD+VLZ9ATBA0tvD0mmYH7Kh7s2qqPe2dOzPkd0muLx8ewf1B55PV95fJLvCBrgf+Gy6Vz4QGFkp1oh4BXhF0v6p3tgaxWdmZlWq5efIzwfefno9Im4mu/f6R0mPSXqA7Gr51hr22aqImE92H/fe9CDWz9OmrwMnS5pLlsS+2UoTrVkB7J0+ZvUJ3rnvXOr3TbJE/5PU72zgo2nzFOBSSbPJkmdr9cpdDfwtIh5vZ6yt+RVwYup3l3RMADcAzwKPkd02mAksa+OYTgYuTsekGsVnZmZV0jujsFYNScsjYtNu7vMiYFZE/L9u6GvTiFieRjIeAT4WEX+vZR/19fXR0NBQyybNzN71JDVGxDrfGeKvaF3PSWoku2L+t27q8g/p3veGwI9qncTNzKy2nMjbqbuvxtPHvbqzv5Hd2Z+ZmXWOv2vdzMyswJzIzczMCsyJ3MzMrMCcyM3MzArMidzMzKzAnMjNzMwKzInczMyswPw5cut2TYuXUTdheqfaWHTuETWKxsys2HxFbmZmVmC+Iq9AUjPZnN4lU4F9gB2ATYEBwNNp22CyqVu3IpvlbXEqPyoiFrWzP5FNMPO1iHigE/FPAf4QEde3Y5+JwPKI+Jmk/wDui4g7OhqDmZl1LSfyyt6IiKEtbZA0EhgfEZ8qKz8JqI+Ir3WmP0mHkk0F+/FqdpTUu4q5yttsI78eEWd3pj0zM+t6Hlpff20O/ANAmfMkzZPUJGl0Kh8paYakacBjqd5FkhZIugN4X6kxScMl3SupUdKtkgal8nskXSCpgbIpXSVNkXRsWl4k6YeSZqYYdknlm0iaLOkRSbMkHdkdJ8fMzDK+Iq9sozTPdsk5EXFtN/TXDxhENt85wDHAUGAPsjnfH5V0X9o2DNgtIp6WdAywM7ArMJBsXvHJkvoAvwSOjIgX0xuBHwOnpDY2LE2Nl4bWW7M0IoZJ+ldgPPAl4Czgrog4Jc2a9oikOyJiRX5HSeOAcQC9Nh/Q7hNjZmYtcyKvrNWh9a7uT9J+wJWSdgP2B66JiGZgiaR7gb2AV4FHIqJ0n/6AXL3nJN2VyncGdgNulwTQC3g+12+1b05uTP82kr25ADgE+Iyk8Wm9H/BPwOP5HSNiEjAJoO+gHaPK/szMrA1O5N1I0j7Ar9Pq2RExrbW6EfGgpG3IHqirZEUb2yF7eG5+ROzXiTYAVqV/m3nnd0fAZyNiQZVtmJlZDfkeeTeKiIcjYmh6tZrEAdI96F7AS8AMYLSkXpIGkF15P9LCbvfl6g0CDkzlC4AB6SofSX0kDa7RYd0KfF3pUl/SnjVq18zMquAr8srK75HfEhETuqk/ASdGRLOkm4D9gDlAAN+JiL+XHjjLuYnsvvpjwF+BBwEi4s300NqFkvqT/dwvIPu4XGf9KLU1V9IGZB/H+1TFPczMrGYU4duV1r3q6+ujoaGhp8MwMysUSY2lB5PzPLRuZmZWYE7kZmZmBeZEbmZmVmBO5GZmZgXmRG5mZlZgTuRmZmYF5kRuZmZWYE7kZmZmBeZEbmZmVmD+ilbrdk2Ll1E3YXrV9Rede0QXRmNmVmxtXpFLOkvSfElzJc1OM3h1OUkDJf1G0lOSGiU9KOnoLu7zBEnzJDVJmpWbmrO1+kdJ2jW3fo+kdb4+r8L+dZK+0JmYUzuLUsxzJd0m6f2dbbMDMQyVdHh392tm9l5XMZGn2bI+BQyLiN2Bg4G/dXVQaSat3wH3RcSHImI4MAbYrgv7HAV8CzgkIoYA+wLL2tjtKGDXNupUUge0K5FLam0U5cD0M2oAvle2j9KEJl1pKOBEbmbWzdr64z4IWBoRqwAiYmlEPCdpL0k3Akg6UtIbkjaU1E/SU6n8y5IelTRH0g2SNk7lUyRdKOmBdLV9bAv9fgJ4MyIuLRVExDMR8cvURp2kGZJmptdHU/nIdFV8vaQnJF2dklh9Gk2Yna5cW5op5rvA+Ih4LvW3KiL+p7VjSX1+Bjgvtfvh1M7nJD0i6UlJI9L+vSSdl9qYK+lfUt1zgRFp/9Nbq5eOa4akaWQzm1VyH/CRdI4WSLoSmAd8UNIZubZ/mNreRNL0dGzzJI1O5cMl3ZtGQ25VNi1qadThJ/ljlLQh8B9kU6jOLrVhZmZdr6175LcBZ0t6ErgDuDYi7gVmkV2BAYwgSxR7pfYeTuU35hLhfwKnAr9M2wYB+wO7ANOA68v6HQzMrBDXC8AnI2KlpB2Ba4DSkPaeaf/ngPuBj0XEn0rxSjoPuKWFNncDGlvpb51jiYhfpsT6h4i4Pm0D6B0Re6dh5h+QjWKcCiyLiL0k9QXul3QbMIHszcOn0v7jWqkHMAzYLSKernBeIBtBaUrLO5JNhfqQpEPS+t5kU6ROk3QAMAB4LiKOSDH0l9SH7Gd1ZES8mBLzj4FTUrtrHWNEHCzpbKA+Ir7WRnxmZlZDFRN5RCyXNJwsWR8IXCtpQkRMkbRQ0j+TJYafAwcAvYAZaffdUtLbAtgUuDXX9O8iYg3wmKSBbQUp6WKyxP9mROwF9AEukjQUaAZ2ylV/JCKeTfvNJhu+/lNaH02WEA9pq88ylY6l3I3p38bUN6m/3XOjD/3JkuqbZftWqvdIG0n8bknNwFzg+ynWZyLioVzbh5C9CSMdx45kP6/zJf2E7E3JDEm7kb2xuT29OekFPN/GMVaU3qSMA+i1+YBqdjEzsyq0+dR6RDQD9wD3SGoCTgSmkA3hjgJWk12tTyH7g39G2nUKcFREzJF0EjAy1+yq3LJa6HY+8NlcDKdJ2obs/i/A6cASYA+y2wMrW2m7uXSMKTlNBA5Ix9RSn8OBu1rYVulYypX6f7tvsmP8ekSs9QZAUnk7leqtqNAnZPfIl+b22aJsHwHnRMSvy3eUNIzs/vZ/SroTuAmYHxH7tdJXS8dYUURMAiYB9B20Y0u3NszMrAPaetht5zR0XTIUeCYtzyB7OOzBiHgR2BrYmWyYHWAz4Pk0TDu2nXHdBfST9NVc2ca55f7A8+mq/otkbyAqHccWZMPvJ6RYW3IO2f3u96d9NpT0pTaO5bW0rS23Al9N+yNpJ0mbtLB/a/Vq4VbgFEmbprY/IOl9krYFXo+I/wXOIxuxWAAMUPawI5L6SBrcRvvVngszM6uhtq6mNgV+mRLhW8BfSMOjZPfCB5JdmUM2pPv+iChdbf17qvNi+rfqP/IREZKOAn4h6TupjRXAmanKr4AbJJ1Adr+7ravVI4Htgf9JQ8VExNCyPm9Ow/x3KKsUwOQ2jmVqavMbQEsP7ZVcRjYEPTO1/SLZE+9zgWZJc8iu+v+7lXqdFhG3pVshD6ZzsBw4HvgI2RuYNWSjK1+NiDfT8P6FkvqT/Z5cQDZq0Zq7gQnpdsY5EXFtLeI2M7PK9E7eNesefQftGINOvKDq+v5CGDMzkNQYEet8V4m/2c263ZAP9KfBydnMrCb8XetmZmYF5kRuZmZWYE7kZmZmBeZEbmZmVmBO5GZmZgXmRG5mZlZgTuRmZmYF5kRuZmZWYP5CGOt2TYuXUTdheqvb/U1uZmbV8xW5mZlZgTmRl5EUks7PrY+XNDG3frykuZLmS5oj6TJJIyTNTq+XJT2dlu9oR78jJS1L+82VdIek93XyWBal6V/bs889kurT8s1pwhwzM1tPOZGvaxVwTEsJUNJhZHOhj4qIwWRTfj4AvBARQ9OMatOAM9L6we3se0bab3fgUeC0aneU1OnbJJLWmg42Ig6PiFc6266ZmXUdJ/J1vQVMIkvY5c4CxkfEYoCIaI6IyRGxoJYBpClMNwP+kda3kvS7dKX+kKTdU/lESVdJuh+4StLWkm5LowWXAcq1ebykR9IV/69LSVvScknnp6lU9yuLY5GkbSTVSXpc0v+ktm+TtFGq82FJt0hqlDRD0i61PBdmZlaZE3nLLgbGprm48wYDM7uw3xFpPu+/AgfzznzoPwRmpSv17wFX5vbZFTg4Io4DfgD8KY0W3AT8E0Cah3w08LE0atAMjE37bwI8HBF7RMSfKsS2I3BxavsV4LOpfBLw9YgYDownmyt+HZLGSWqQ1ND8+rJqzoWZmVXBibwFEfEqWbL8Rmt1JA1JV7cLJY2uUdelofUPApcDP03l+wNXpdjuAraWtHnaNi0i3kjLBwD/m+pNJ13RAwcBw4FH0xuFg4APpW3NwA1VxPZ0RMxOy41AnaRNgY8C16V2fw0MamnniJgUEfURUd9r4/L3R2Zm1lH++FnrLiC7+r48Vzaf7L743RHRBAyVdBGwUTUNSjoN+HJaPTwinqtQfRrVJdgV1XQNXBER321h28qIaK6ijVW55WayY94AeCVd5ZuZWQ/wFXkrIuJl4LfAqbnic4CfSdouV1ZVEk9tXlx6KK6NJA7ZVfjCtDyDNBQuaSSwNI0alLsP+EKqNwrYMpXfCRxbego+3XPfvtq4W5NieFrS51K7krRHZ9s1M7Pq+Yq8svOBr5VWIuJmSQOAP6aHxV4B5gG31qi/0j1yAcuAL6XyicBkSXOB14ETW9n/h8A1kuaTPU3/1xT3Y5K+D9wmaQNgNdkT8c/UIOaxwCWp/T7AVGBODdo1M7MqKCJ6OgZ7j6mvr4+GhoaeDsPMrFAkNUZEfXm5h9bNzMwKzInczMyswJzIzczMCsyJ3MzMrMCcyM3MzArMidzMzKzAnMjNzMwKzInczMyswJzIzczMCsxf0WrdrmnxMuomTH97fdG5R/RgNGZmxeYrcjMzswJzIm+DpJB0fm59vKSJufXjJc2VNF/SHEmXSRqR5iqfLellSU+n5Ts60P8FkhanyU5KZRMlje/0wXUBSct7OgYzs/cSJ/K2rQKOkbRN+QZJhwGnA6MiYjDZXOUPAC+Upislm1f8jLR+cHs6Tsn7aOBvwMc7dxhV9derq/swM7PaciJv21vAJLKEXe4sYHxELAaIiOaImBwRC2rU90hgPnAJcFzZtj0kPSjpz5K+DNlc5ZLukXS9pCckXS1JadtBkmZJapI0WVLfVL5I0k8kzQQ+l9bPSSMIDZKGSbpV0kJJX0n7bCrpTkkzU3tH1uh4zcysnZzIq3MxMFZS/7LywcDMLuz3OOAa4CbgCEl9ctt2Bz4B7AecLWnbVL4n8C1gV+BDwMck9QOmAKMjYgjZQ45fzbX1UkQMi4ipaf2vaTRhRtrvWGBfsvnOAVYCR0fEMOBA4PzSG4bWSBqX3hg0NL++rF0nwczMWudEXoWIeBW4EvhGa3UkDUlXsQslje5sn5I2BA4Hfpf6fxg4NFfl/yLijYhYCtwN7J3KH4mIZyNiDTAbqAN2Bp6OiCdTnSuAA3JtXVvW/bT0bxPwcES8FhEvAqskbQEI+C9Jc4E7gA8AAysdT0RMioj6iKjvtXH5+yEzM+soJ/LqXQCcCmySK5tPdl+ciGhKV7F/BDaqpkFJp+Ueitu2bPOhwBZAk6RFwP6sPbweZfVL66tyZc1U9xHDFWXrpTbWlLW3JrU3FhgADE/HvAToV0U/ZmZWY07kVYqIl4HfkiXzknOAn0naLldWVRJPbV5ceiguIp4r23wc8KWIqIuIOmAH4JOSNk7bj5TUT9LWZPfSH63Q1QKgTtJH0voXgXurjbMF/cke6Fst6UBg+060ZWZmneAvhGmf84GvlVYi4mZJA4A/pie+XwHmAbd2ppOUrA8DvpLra4WkPwGfTkVzyYbUtwF+FBHPSdqppfYiYqWkk4HrJPUmS/qXdiLEq4HfS2oCGoAnOtGWmZl1giLKR2jNulZ9fX00NDT0dBhmZoUiqTEi6svLPbRuZmZWYB5aNzOzwli9ejXPPvssK1eu7OlQuky/fv3Ybrvt6NOnT9uVcSI3M7MCefbZZ9lss82oq6ujja+vKKSI4KWXXuLZZ59lhx12qGofD62bmVlhrFy5kq233vpdmcQBJLH11lu3a8TBidzMzArl3ZrES9p7fE7kZmZmBeZ75GZmVlh1E6bXtL1F5x5R0/a6g6/IzczMCsyJ3MzMrB1WrFjBEUccwR577MFuu+3GtddeS11dHUuXLgWgoaGBkSNHAjBx4kROPPFERowYwfbbb8+NN97Id77zHYYMGcJhhx3G6tWrOx2PE7l1u6bFnsbUzIrrlltuYdttt2XOnDnMmzePww47rGL9hQsXctdddzFt2jSOP/54DjzwQJqamthoo42YPr3ztwacyM3MzNphyJAh3H777Zx55pnMmDGD/v0rT808atQo+vTpw5AhQ2hubn478Q8ZMoRFixZ1Oh4/7NYOkprJ5ugumQrsQzYz2aZkU3s+nbYNJpvmdCuyGdEWp/KjImJRO/o8CrgJ+OeIeCKVjQTGR8SnOnYkXUfSPWSx+cvUzexdaaeddmLmzJncfPPNfP/73+eggw6id+/erFmzBmCdz4D37dsXgA022IA+ffq8/fGyDTbYgLfeeqvT8TiRt88baf7tdbSWXCWdBNRHxNda2K0axwF/Sv/+oINtVEVS74jo/G+Vmdm72HPPPcdWW23F8ccfzxZbbMFll11GXV0djY2NjBo1ihtuuKFb43EiX49J2hTYHzgQ+D1rJ/LNJU0HPkI2nem/RsQaScuB/wY+BbwBHBkRSyTVAZPJpj19ETg5Iv4qaQqwEtgTuF/SVmm/PYH3AacAJwD7AQ9HxEkptkuAvchGG66PiC59k2Fm1pKe+LhYU1MTZ5xxxttX2JdccglvvPEGp556Kv/+7//+9oNu3cWJvH02kjQ7t35ORFzbhf0dCdwSEU9KeknS8IhoTNv2BnYFngFuAY4Brgc2AR6KiLMk/RT4MvCfwC+BKyLiCkmnABcCR6W2tgM+GhHNKbFvSZa4PwNMAz4GfAl4VNLQiJgNnBURL6d52O+UtHtEzG3tQCSNA8YB9Np8QC3OjZlZjzj00EM59NBD1yl/8skn1ymbOHHiWuvLly9vdVtH+WG39nkjIobmXl2ZxCEbTp+alqem9ZJHIuKpiGgGriG7cgd4E/hDWm4E6tLyfsBv0vJVufoA16V2Sn4f2UT1TcCSiGiKiDVk9/xL7X1e0kxgFtnzALtWOpCImBQR9RFR32vjyg+GmJlZ9XxF3oMk7QP8Oq2eHRHTctu2Aj4BDJEUQC8gJJ2RqkRZc6X11SkJAzRT3c94Rdn6qvTvmtxyab23pB2A8cBeEfGPdBXfr4p+zMysxnxF3oMi4uHc1f20ss3HAldFxPYRURcRHyR7In5E2r63pB0kbQCMJnsgrpIHgDFpeSwwoxOhb06W/JdJGgiM6kRbZmbt8s61yrtTe4/Pibx9NpI0O/c6twv7Oo7sY2d5N/DO8PqjwEXA42QJvrxuua8DJ0uaC3wR+GZHA4uIOWRD6k+QDdff39G2zMzao1+/frz00kvv2mRemo+8X7/qBzn1bj0Ztv6qr6+PhgZ/zNzM2m/16tU8++yz7Zqvu2j69evHdtttR58+fdYql9QYEfXl9X2P3MzMCqNPnz7ssMMOPR3GesVD62ZmZgXmRG5mZlZgTuRmZmYF5ofdrNtJeg1Y0NNxlNkGWNrTQbRgfYzLMVVvfYxrfYwJ1s+41reYto+Idb4a0w+7WU9Y0NKTlz1JUsP6FhOsn3E5puqtj3GtjzHB+hnX+hhTSzy0bmZmVmBO5GZmZgXmRG49YVJPB9CC9TEmWD/jckzVWx/jWh9jgvUzrvUxpnX4YTczM7MC8xW5mZlZgTmRm5mZFZgTuXUbSYdJWiDpL5ImdHFfH5R0t6THJM2X9M1UPlHS4twMdofn9vluim2BpEO7Km5JiyQ1pf4bUtlWkm6X9Of075apXJIuTH3PlTQs186Jqf6fJZ3YiXh2LpvV71VJ3+qJcyVpsqQXJM3LldXs3Egans79X9K+6mBM50l6IvV7k6QtUnmdpDdy5+zStvpu7fg6EFPNfl7Kpkh+OJVfK2nDtmKqENe1uZgWSZrdzeeqtb8FPfp7VVMR4ZdfXf4CegELgQ8BGwJzgF27sL9BwLC0vBnwJLArMBEY30L9XVNMfYEdUqy9uiJuYBGwTVnZT4EJaXkC8JO0fDjwR0DAvsDDqXwr4Kn075Zpecsa/Zz+DmzfE+cKOAAYBszrinMDPJLqKu07qoMxHQL0Tss/ycVUl69X1k6Lfbd2fB2IqWY/L+C3wJi0fCnw1Y7+/Mq2nw+c3c3nqrW/BT36e1XLl6/IrbvsDfwlIp6KiDeBqcCRXdVZRDwfETPT8mtk87Z/oMIuRwJTI2JVRDwN/CXF3F1xHwlckZavAI7KlV8ZmYeALSQNAg4Fbo+IlyPiH8DtwGE1iOMgYGFEPNNGrF1yriLiPuDlFvrr9LlJ2zaPiIci++t7Za6tdsUUEbdFxFtp9SFgu0pttNF3a8fXrpgqaNfPK11NfgK4vj0xtRVXavfzwDWV2uiCc9Xa34Ie/b2qJSdy6y4fAP6WW3+Wyom1ZiTVAXsCD6eir6Uhs8m5obnW4uuKuAO4TVKjpHGpbGBEPJ+W/w4M7IG4AMaw9h/anj5XULtz84G0XOv4TiG7CivZQdIsSfdKGpGLtbW+Wzu+jqjFz2tr4JXcG5VanacRwJKI+HOurFvPVdnfgvX996pqTuT2riZpU+AG4FsR8SpwCfBhYCjwPNlQX3fbPyKGAaOA0yQdkN+Y3tV3++dC033QzwDXpaL14VytpafOTWsknQW8BVydip4H/iki9gS+DfxG0ubVttfJ41vvfl5ljmPtN4ndeq5a+FvQ4bbWN07k1l0WAx/MrW+XyrqMpD5k/3GvjogbASJiSUQ0R8Qa4H/IhhcrxVfzuCNicfr3BeCmFMOSNERXGlp8obvjIntjMTMilqT4evxcJbU6N4tZewi8U/FJOgn4FDA2JQLS8PVLabmR7B70Tm303drxtUsNf14vkQ0n9y4r77DU1jHAtbl4u+1ctfS3oEJbPfp71RFO5NZdHgV2TE/Dbkg2hDutqzpL9+P+H/B4RPw8Vz4oV+1ooPR07TRgjKS+knYAdiR7gKWmcUvaRNJmpWWyh6bmpTZLT8GeCPxfLq4T0pO0+wLL0nDgrcAhkrZMQ6iHpLLOWOuKqafPVU5Nzk3a9qqkfdPvxwm5ttpF0mHAd4DPRMTrufIBknql5Q+RnZun2ui7teNrb0w1+XmlNyV3A8d2Nqacg4EnIuLtIejuOlet/S2o0FaP/V51WHuejPPLr868yJ4GfZLsnfdZXdzX/mRDZXOB2el1OHAV0JTKpwGDcvuclWJbQO6p01rGTfaE8Jz0ml9qj+y+5J3An4E7gK1SuYCLU99NQH2urVPIHlz6C3ByJ+PahOxKrH+urNvPFdkbieeB1WT3Gk+t5bkB6skS3ELgItK3W3Ygpr+Q3S8t/W5dmup+Nv1cZwMzgU+31Xdrx9eBmGr280q/p4+k47wO6NvRn18qnwJ8paxud52r1v4W9OjvVS1f/opWMzOzAvPQupmZWYE5kZuZmRWYE7mZmVmBOZGbmZkVmBO5mZlZgTmRm5mZFZgTuZmZWYH9f+MJU2sei/e1AAAAAElFTkSuQmCC\n" }, "metadata": { "needs_background": "light" } } ] }, { "cell_type": "code", "source": [ "len(train_df.drop(columns=['StudyInstanceUID','PatientID']).agg(['sum']).T)\n", "# num of classes" ], "metadata": { "execution": { "iopub.status.busy": "2022-10-28T02:47:07.675004Z", "iopub.execute_input": "2022-10-28T02:47:07.675678Z", "iopub.status.idle": "2022-10-28T02:47:07.695859Z", "shell.execute_reply.started": "2022-10-28T02:47:07.675642Z", "shell.execute_reply": "2022-10-28T02:47:07.694954Z" }, "trusted": true, "id": "gdTMPjw_1Cjz", "outputId": "85e76a14-a8c8-4846-8db8-6cc7ba628017" }, "execution_count": null, "outputs": [ { "execution_count": 6, "output_type": "execute_result", "data": { "text/plain": "11" }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "import torch\n", "import cv2\n", "import numpy as np\n", "from torchvision import transforms\n", "from torch.utils.data import Dataset" ], "metadata": { "execution": { "iopub.status.busy": "2022-10-28T02:47:09.612626Z", "iopub.execute_input": "2022-10-28T02:47:09.613355Z", "iopub.status.idle": "2022-10-28T02:47:11.485435Z", "shell.execute_reply.started": "2022-10-28T02:47:09.613293Z", "shell.execute_reply": "2022-10-28T02:47:11.484503Z" }, "trusted": true, "id": "0AMhReRR1Cj0" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "class ImageDataset(Dataset):\n", " def __init__(self, csv, train, test):\n", " self.csv = csv\n", " self.train = train\n", " self.test = test\n", " self.all_image_names = self.csv[:]['StudyInstanceUID']\n", " self.all_labels = np.array(self.csv.drop(['StudyInstanceUID', 'PatientID'], axis=1))\n", " self.train_ratio = int(0.85 * len(self.csv))\n", " self.valid_ratio = len(self.csv) - self.train_ratio\n", " # set the training data images and labels\n", " if self.train == True:\n", " print(f\"Number of training images: {self.train_ratio}\")\n", " self.image_names = list(self.all_image_names[:self.train_ratio])\n", " self.labels = list(self.all_labels[:self.train_ratio])\n", " # define the training transforms\n", " self.transform = transforms.Compose([\n", " transforms.ToPILImage(),\n", " transforms.Resize((400, 400)),\n", " transforms.RandomHorizontalFlip(p=0.5),\n", " transforms.RandomRotation(degrees=45),\n", " transforms.ToTensor(),\n", " ])\n", " # set the validation data images and labels\n", " elif self.train == False and self.test == False:\n", " print(f\"Number of validation images: {self.valid_ratio}\")\n", " self.image_names = list(self.all_image_names[-self.valid_ratio:-10])\n", " self.labels = list(self.all_labels[-self.valid_ratio:])\n", " # define the validation transforms\n", " self.transform = transforms.Compose([\n", " transforms.ToPILImage(),\n", " transforms.Resize((400, 400)),\n", " transforms.ToTensor(),\n", " ])\n", " # set the test data images and labels, only last 10 images\n", " # this, we will use in a separate inference script\n", " elif self.test == True and self.train == False:\n", " self.image_names = list(self.all_image_names[-10:])\n", " self.labels = list(self.all_labels[-10:])\n", " # define the test transforms\n", " self.transform = transforms.Compose([\n", " transforms.ToPILImage(),\n", " transforms.ToTensor(),\n", " ])\n", " def __len__(self):\n", " return len(self.image_names)\n", " \n", " def __getitem__(self, index):\n", " image = cv2.imread(f\"../input/ranzcr-clip-catheter-line-classification/train/{self.image_names[index]}.jpg\")\n", " # convert the image from BGR to RGB color format\n", " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", " # apply image transforms\n", " image = self.transform(image)\n", " targets = self.labels[index]\n", " \n", " return {\n", " 'image': torch.tensor(image, dtype=torch.float32),\n", " 'label': torch.tensor(targets, dtype=torch.float32)\n", " }" ], "metadata": { "execution": { "iopub.status.busy": "2022-10-28T02:47:13.898477Z", "iopub.execute_input": "2022-10-28T02:47:13.899027Z", "iopub.status.idle": "2022-10-28T02:47:13.912689Z", "shell.execute_reply.started": "2022-10-28T02:47:13.898986Z", "shell.execute_reply": "2022-10-28T02:47:13.911765Z" }, "trusted": true, "id": "s2qBJ08t1Cj0" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "import torchvision\n", "torchvision.__version__" ], "metadata": { "execution": { "iopub.status.busy": "2022-10-28T03:05:44.948332Z", "iopub.execute_input": "2022-10-28T03:05:44.948713Z", "iopub.status.idle": "2022-10-28T03:05:44.955478Z", "shell.execute_reply.started": "2022-10-28T03:05:44.948681Z", "shell.execute_reply": "2022-10-28T03:05:44.954402Z" }, "trusted": true, "id": "HgyolOKX1Cj1", "outputId": "eabf26d8-d035-47c8-961d-39d5627588c3" }, "execution_count": null, "outputs": [ { "execution_count": 29, "output_type": "execute_result", "data": { "text/plain": "'0.12.0'" }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "print(dir(torchvision.models))" ], "metadata": { "execution": { "iopub.status.busy": "2022-10-28T03:42:01.322455Z", "iopub.execute_input": "2022-10-28T03:42:01.322825Z", "iopub.status.idle": "2022-10-28T03:42:01.328721Z", "shell.execute_reply.started": "2022-10-28T03:42:01.322792Z", "shell.execute_reply": "2022-10-28T03:42:01.327749Z" }, "trusted": true, "id": "L_eYnLj-1Cj2", "outputId": "92372aff-1f26-4aea-e9f2-8ecef131f56f" }, "execution_count": null, "outputs": [ { "name": "stdout", "text": "['AlexNet', 'ConvNeXt', 'DenseNet', 'EfficientNet', 'GoogLeNet', 'GoogLeNetOutputs', 'Inception3', 'InceptionOutputs', 'MNASNet', 'MobileNetV2', 'MobileNetV3', 'RegNet', 'ResNet', 'ShuffleNetV2', 'SqueezeNet', 'VGG', 'VisionTransformer', '_GoogLeNetOutputs', '_InceptionOutputs', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '_utils', 'alexnet', 'convnext', 'convnext_base', 'convnext_large', 'convnext_small', 'convnext_tiny', 'densenet', 'densenet121', 'densenet161', 'densenet169', 'densenet201', 'detection', 'efficientnet', 'efficientnet_b0', 'efficientnet_b1', 'efficientnet_b2', 'efficientnet_b3', 'efficientnet_b4', 'efficientnet_b5', 'efficientnet_b6', 'efficientnet_b7', 'feature_extraction', 'googlenet', 'inception', 'inception_v3', 'mnasnet', 'mnasnet0_5', 'mnasnet0_75', 'mnasnet1_0', 'mnasnet1_3', 'mobilenet', 'mobilenet_v2', 'mobilenet_v3_large', 'mobilenet_v3_small', 'mobilenetv2', 'mobilenetv3', 'optical_flow', 'quantization', 'regnet', 'regnet_x_16gf', 'regnet_x_1_6gf', 'regnet_x_32gf', 'regnet_x_3_2gf', 'regnet_x_400mf', 'regnet_x_800mf', 'regnet_x_8gf', 'regnet_y_128gf', 'regnet_y_16gf', 'regnet_y_1_6gf', 'regnet_y_32gf', 'regnet_y_3_2gf', 'regnet_y_400mf', 'regnet_y_800mf', 'regnet_y_8gf', 'resnet', 'resnet101', 'resnet152', 'resnet18', 'resnet34', 'resnet50', 'resnext101_32x8d', 'resnext50_32x4d', 'segmentation', 'shufflenet_v2_x0_5', 'shufflenet_v2_x1_0', 'shufflenet_v2_x1_5', 'shufflenet_v2_x2_0', 'shufflenetv2', 'squeezenet', 'squeezenet1_0', 'squeezenet1_1', 'vgg', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19', 'vgg19_bn', 'video', 'vision_transformer', 'vit_b_16', 'vit_b_32', 'vit_l_16', 'vit_l_32', 'wide_resnet101_2', 'wide_resnet50_2']\n", "output_type": "stream" } ] }, { "cell_type": "code", "source": [ "from torchvision import models \n", "import torch.nn as nn\n", "def model(pretrained, requires_grad):\n", " model = models.efficientnet_b7(progress=True, pretrained=pretrained)\n", " # to freeze the hidden layers\n", " if requires_grad == False:\n", " for param in model.parameters():\n", " param.requires_grad = False\n", " # to train the hidden layers\n", " elif requires_grad == True:\n", " for param in model.parameters():\n", " param.requires_grad = True\n", " # make the classification layer learnable\n", " # we have 11 classes in total\n", " model.classifier[1] = nn.Linear(in_features=2560, out_features=11)\n", " return model" ], "metadata": { "execution": { "iopub.status.busy": "2022-10-28T04:24:13.656142Z", "iopub.execute_input": "2022-10-28T04:24:13.656782Z", "iopub.status.idle": "2022-10-28T04:24:13.663143Z", "shell.execute_reply.started": "2022-10-28T04:24:13.656746Z", "shell.execute_reply": "2022-10-28T04:24:13.662004Z" }, "trusted": true, "id": "QrLWPTo41Cj2" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from tqdm import tqdm\n", "from torchmetrics import Accuracy, AUROC, F1Score, Precision, Recall\n", "# training function\n", "def train(model, dataloader, optimizer, criterion, train_data, device):\n", " print('Training')\n", " model.train()\n", " counter = 0\n", " train_running_loss = 0.0\n", " # instantiate metrics\n", " acc = Accuracy()\n", " auc = AUROC()\n", " f1_score = F1Score()\n", " precision = Precision()\n", " recall = Recall()\n", " preds = []\n", " labels = []\n", " for i, data in tqdm(enumerate(dataloader), total=int(len(train_data)/dataloader.batch_size)):\n", " counter += 1\n", " data, target = data['image'].to(device), data['label'].to(device)\n", " labels.append(target.cpu().numpy().argmax(axis=1))\n", " optimizer.zero_grad()\n", " outputs = model(data)\n", " # apply sigmoid activation to get all the outputs between 0 and 1\n", " outputs = torch.sigmoid(outputs)\n", " loss = criterion(outputs, target)\n", " train_running_loss += loss.item()\n", " # backpropagation\n", " loss.backward()\n", " # update optimizer parameters\n", " optimizer.step()\n", " preds.append(outputs.detach().cpu().numpy().argmax(axis=1))\n", " \n", " train_loss = train_running_loss / counter\n", " preds = torch.tensor(np.concatenate(preds))\n", " labels = torch.tensor(np.concatenate(labels))\n", " train_acc = acc(preds, labels).item()\n", " \n", " return train_loss, train_acc" ], "metadata": { "execution": { "iopub.status.busy": "2022-10-28T04:24:16.962617Z", "iopub.execute_input": "2022-10-28T04:24:16.963764Z", "iopub.status.idle": "2022-10-28T04:24:16.976195Z", "shell.execute_reply.started": "2022-10-28T04:24:16.963715Z", "shell.execute_reply": "2022-10-28T04:24:16.975023Z" }, "trusted": true, "id": "HQLCRVLO1Cj-" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# validation function\n", "def validate(model, dataloader, criterion, val_data, device):\n", " print('Validating')\n", " model.eval()\n", " counter = 0\n", " val_running_loss = 0.0\n", " # instantiate metrics\n", " acc = Accuracy()\n", " auc = AUROC()\n", " f1_score = F1Score()\n", " precision = Precision()\n", " recall = Recall()\n", " preds = []\n", " labels = []\n", " with torch.no_grad():\n", " for i, data in tqdm(enumerate(dataloader), total=int(len(val_data)/dataloader.batch_size)):\n", " counter += 1\n", " data, target = data['image'].to(device), data['label'].to(device)\n", " labels.append(target.cpu().numpy().argmax(axis=1))\n", " # make predictions\n", " outputs = model(data)\n", " # apply sigmoid activation to get all the outputs between 0 and 1\n", " outputs = torch.sigmoid(outputs)\n", " loss = criterion(outputs, target)\n", " val_running_loss += loss.item()\n", " preds.append(outputs.detach().cpu().numpy().argmax(axis=1))\n", " \n", " val_loss = val_running_loss / counter\n", " preds = torch.tensor(np.concatenate(preds))\n", " labels = torch.tensor(np.concatenate(labels))\n", " val_acc = acc(preds, labels).item()\n", " return val_loss, val_acc" ], "metadata": { "execution": { "iopub.status.busy": "2022-10-28T04:24:20.716225Z", "iopub.execute_input": "2022-10-28T04:24:20.716601Z", "iopub.status.idle": "2022-10-28T04:24:20.727583Z", "shell.execute_reply.started": "2022-10-28T04:24:20.716567Z", "shell.execute_reply": "2022-10-28T04:24:20.726191Z" }, "trusted": true, "id": "LyvS7_OW1Cj-" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "import torch.optim as optim\n", "import matplotlib\n", "from torch.utils.data import DataLoader\n", "matplotlib.style.use('ggplot')\n", "# initialize the computation device\n", "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')" ], "metadata": { "execution": { "iopub.status.busy": "2022-10-28T04:24:24.617804Z", "iopub.execute_input": "2022-10-28T04:24:24.618166Z", "iopub.status.idle": "2022-10-28T04:24:24.623867Z", "shell.execute_reply.started": "2022-10-28T04:24:24.618134Z", "shell.execute_reply": "2022-10-28T04:24:24.622628Z" }, "trusted": true, "id": "-GFaZnzr1Cj_" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "#intialize the model\n", "from torch.optim.lr_scheduler import ReduceLROnPlateau\n", "\n", "ENet_model = model(pretrained=True, requires_grad=False).to(device)\n", "# learning parameters\n", "lr = 0.0001\n", "epochs = 30\n", "batch_size = 32\n", "optimizer = optim.Adam(ENet_model.parameters(), lr=lr)\n", "scheduler = ReduceLROnPlateau(optimizer, 'min')\n", "criterion = nn.BCELoss()" ], "metadata": { "execution": { "iopub.status.busy": "2022-10-28T04:24:26.626925Z", "iopub.execute_input": "2022-10-28T04:24:26.630098Z", "iopub.status.idle": "2022-10-28T04:24:28.432430Z", "shell.execute_reply.started": "2022-10-28T04:24:26.630044Z", "shell.execute_reply": "2022-10-28T04:24:28.431444Z" }, "trusted": true, "id": "JzQrr2xz1Cj_" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "train_data = ImageDataset(\n", " train_df, train=True, test=False\n", ")\n", "# validation dataset\n", "valid_data = ImageDataset(\n", " train_df, train=False, test=False\n", ")\n", "# train data loader\n", "train_loader = DataLoader(\n", " train_data, \n", " batch_size=batch_size,\n", " shuffle=True\n", ")\n", "# validation data loader\n", "valid_loader = DataLoader(\n", " valid_data, \n", " batch_size=batch_size,\n", " shuffle=False\n", ")" ], "metadata": { "execution": { "iopub.status.busy": "2022-10-28T04:24:32.210103Z", "iopub.execute_input": "2022-10-28T04:24:32.210655Z", "iopub.status.idle": "2022-10-28T04:24:32.243373Z", "shell.execute_reply.started": "2022-10-28T04:24:32.210615Z", "shell.execute_reply": "2022-10-28T04:24:32.242479Z" }, "trusted": true, "id": "R-b3ePes1CkA", "outputId": "21605b5c-fa5e-47e1-b622-7748fe8e8863" }, "execution_count": null, "outputs": [ { "name": "stdout", "text": "Number of training images: 25570\nNumber of validation images: 4513\n", "output_type": "stream" } ] }, { "cell_type": "code", "source": [ "class EarlyStopper:\n", " def __init__(self, patience=1, min_delta=0):\n", " self.patience = patience\n", " self.min_delta = min_delta\n", " self.counter = 0\n", " self.min_validation_loss = np.inf\n", "\n", " def early_stop(self, validation_loss):\n", " if validation_loss < self.min_validation_loss:\n", " self.min_validation_loss = validation_loss\n", " self.counter = 0\n", " elif validation_loss > (self.min_validation_loss + self.min_delta):\n", " self.counter += 1\n", " if self.counter >= self.patience:\n", " return True\n", " return False" ], "metadata": { "execution": { "iopub.status.busy": "2022-10-28T04:24:36.170818Z", "iopub.execute_input": "2022-10-28T04:24:36.171515Z", "iopub.status.idle": "2022-10-28T04:24:36.178005Z", "shell.execute_reply.started": "2022-10-28T04:24:36.171477Z", "shell.execute_reply": "2022-10-28T04:24:36.176852Z" }, "trusted": true, "id": "ylwK2HO01CkB" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# start the training and validation\n", "train_loss = []\n", "valid_loss = []\n", "train_acc = []\n", "val_acc = []\n", "early_stopper = EarlyStopper(patience=5, min_delta=0.001)\n", "for epoch in range(epochs):\n", " print(f\"Epoch {epoch+1} of {epochs}\")\n", " train_epoch_loss, train_epoch_acc = train(\n", " ENet_model, train_loader, optimizer, criterion, train_data, device\n", " )\n", " valid_epoch_loss, val_epoch_acc = validate(\n", " ENet_model, valid_loader, criterion, valid_data, device\n", " )\n", " if early_stopper.early_stop(valid_epoch_loss): \n", " break\n", " train_loss.append(train_epoch_loss)\n", " valid_loss.append(valid_epoch_loss)\n", " train_acc.append(train_epoch_acc)\n", " val_acc.append(val_epoch_acc)\n", " print(f\"Train Loss: {train_epoch_loss:.4f}\")\n", " print(f'Val Loss: {valid_epoch_loss:.4f}')\n", " print(f'Train accuracy: {train_epoch_acc:.4f}')\n", " print(f'Val accuracy: {val_epoch_acc:.4f}')" ], "metadata": { "execution": { "iopub.status.busy": "2022-10-28T04:24:39.710609Z", "iopub.execute_input": "2022-10-28T04:24:39.710988Z" }, "trusted": true, "id": "O969K8JC1CkB", "outputId": "745f1db7-804a-42e2-c8a4-91b71cde8930" }, "execution_count": null, "outputs": [ { "name": "stderr", "text": "/opt/conda/lib/python3.7/site-packages/torchmetrics/utilities/prints.py:36: UserWarning: Metric `AUROC` will save all targets and predictions in buffer. For large datasets this may lead to large memory footprint.\n warnings.warn(*args, **kwargs)\n", "output_type": "stream" }, { "name": "stdout", "text": "Epoch 1 of 30\nTraining\n", "output_type": "stream" }, { "name": "stderr", "text": " 0%| | 0/799 [00:00