{
"metadata": {
"kernelspec": {
"language": "python",
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.7.12",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"colab": {
"provenance": [],
"include_colab_link": true
}
},
"nbformat_minor": 0,
"nbformat": 4,
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
""
]
},
{
"cell_type": "code",
"source": [
"# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n",
"# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session\n",
"\n",
"import numpy as np \n",
"import pandas as pd "
],
"metadata": {
"_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
"_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
"execution": {
"iopub.status.busy": "2022-10-28T02:46:40.023210Z",
"iopub.execute_input": "2022-10-28T02:46:40.024111Z",
"iopub.status.idle": "2022-10-28T02:46:40.047013Z",
"shell.execute_reply.started": "2022-10-28T02:46:40.024018Z",
"shell.execute_reply": "2022-10-28T02:46:40.046119Z"
},
"trusted": true,
"id": "V06UUnQa1Cjt"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"train_df = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/train.csv')\n",
"display(len(train_df))\n",
"display(train_df.head(3))\n",
"train_annot_df = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/train_annotations.csv')\n",
"display(len(train_annot_df))\n",
"display(train_annot_df.head(3))"
],
"metadata": {
"execution": {
"iopub.status.busy": "2022-10-28T02:46:44.475857Z",
"iopub.execute_input": "2022-10-28T02:46:44.476564Z",
"iopub.status.idle": "2022-10-28T02:46:44.724851Z",
"shell.execute_reply.started": "2022-10-28T02:46:44.476517Z",
"shell.execute_reply": "2022-10-28T02:46:44.723861Z"
},
"trusted": true,
"id": "McQ0e-rw1Cjv",
"outputId": "c0aa2a6f-d842-4a3d-fb22-3b6ae1470b5f"
},
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": "30083"
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": " StudyInstanceUID ETT - Abnormal \\\n0 1.2.826.0.1.3680043.8.498.26697628953273228189... 0 \n1 1.2.826.0.1.3680043.8.498.46302891597398758759... 0 \n2 1.2.826.0.1.3680043.8.498.23819260719748494858... 0 \n\n ETT - Borderline ETT - Normal NGT - Abnormal NGT - Borderline \\\n0 0 0 0 0 \n1 0 1 0 0 \n2 0 0 0 0 \n\n NGT - Incompletely Imaged NGT - Normal CVC - Abnormal CVC - Borderline \\\n0 0 1 0 0 \n1 1 0 0 0 \n2 0 0 0 1 \n\n CVC - Normal Swan Ganz Catheter Present PatientID \n0 0 0 ec89415d1 \n1 1 0 bf4c6da3c \n2 0 0 3fc1c97e5 ",
"text/html": "
\n\n
\n \n \n | \n StudyInstanceUID | \n ETT - Abnormal | \n ETT - Borderline | \n ETT - Normal | \n NGT - Abnormal | \n NGT - Borderline | \n NGT - Incompletely Imaged | \n NGT - Normal | \n CVC - Abnormal | \n CVC - Borderline | \n CVC - Normal | \n Swan Ganz Catheter Present | \n PatientID | \n
\n \n \n \n 0 | \n 1.2.826.0.1.3680043.8.498.26697628953273228189... | \n 0 | \n 0 | \n 0 | \n 0 | \n 0 | \n 0 | \n 1 | \n 0 | \n 0 | \n 0 | \n 0 | \n ec89415d1 | \n
\n \n 1 | \n 1.2.826.0.1.3680043.8.498.46302891597398758759... | \n 0 | \n 0 | \n 1 | \n 0 | \n 0 | \n 1 | \n 0 | \n 0 | \n 0 | \n 1 | \n 0 | \n bf4c6da3c | \n
\n \n 2 | \n 1.2.826.0.1.3680043.8.498.23819260719748494858... | \n 0 | \n 0 | \n 0 | \n 0 | \n 0 | \n 0 | \n 0 | \n 0 | \n 1 | \n 0 | \n 0 | \n 3fc1c97e5 | \n
\n \n
\n
"
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": "17999"
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": " StudyInstanceUID label \\\n0 1.2.826.0.1.3680043.8.498.12616281126973421762... CVC - Normal \n1 1.2.826.0.1.3680043.8.498.12616281126973421762... CVC - Normal \n2 1.2.826.0.1.3680043.8.498.72921907356394389969... CVC - Borderline \n\n data \n0 [[1487, 1279], [1477, 1168], [1472, 1052], [14... \n1 [[1328, 7], [1347, 101], [1383, 193], [1400, 2... \n2 [[801, 1207], [812, 1112], [823, 1023], [842, ... ",
"text/html": "\n\n
\n \n \n | \n StudyInstanceUID | \n label | \n data | \n
\n \n \n \n 0 | \n 1.2.826.0.1.3680043.8.498.12616281126973421762... | \n CVC - Normal | \n [[1487, 1279], [1477, 1168], [1472, 1052], [14... | \n
\n \n 1 | \n 1.2.826.0.1.3680043.8.498.12616281126973421762... | \n CVC - Normal | \n [[1328, 7], [1347, 101], [1383, 193], [1400, 2... | \n
\n \n 2 | \n 1.2.826.0.1.3680043.8.498.72921907356394389969... | \n CVC - Borderline | \n [[801, 1207], [812, 1112], [823, 1023], [842, ... | \n
\n \n
\n
"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"# value counts\n",
"train_df.drop(columns=['StudyInstanceUID','PatientID']).agg(['sum'])\n",
"# unbalanced dataset"
],
"metadata": {
"execution": {
"iopub.status.busy": "2022-10-28T02:46:59.167135Z",
"iopub.execute_input": "2022-10-28T02:46:59.167596Z",
"iopub.status.idle": "2022-10-28T02:46:59.208167Z",
"shell.execute_reply.started": "2022-10-28T02:46:59.167559Z",
"shell.execute_reply": "2022-10-28T02:46:59.207260Z"
},
"trusted": true,
"id": "NdNcRIOP1Cjx",
"outputId": "e602d509-882f-4f7b-a237-eb0f5a2af756"
},
"execution_count": null,
"outputs": [
{
"execution_count": 3,
"output_type": "execute_result",
"data": {
"text/plain": " ETT - Abnormal ETT - Borderline ETT - Normal NGT - Abnormal \\\nsum 79 1138 7240 279 \n\n NGT - Borderline NGT - Incompletely Imaged NGT - Normal \\\nsum 529 2748 4797 \n\n CVC - Abnormal CVC - Borderline CVC - Normal \\\nsum 3195 8460 21324 \n\n Swan Ganz Catheter Present \nsum 830 ",
"text/html": "\n\n
\n \n \n | \n ETT - Abnormal | \n ETT - Borderline | \n ETT - Normal | \n NGT - Abnormal | \n NGT - Borderline | \n NGT - Incompletely Imaged | \n NGT - Normal | \n CVC - Abnormal | \n CVC - Borderline | \n CVC - Normal | \n Swan Ganz Catheter Present | \n
\n \n \n \n sum | \n 79 | \n 1138 | \n 7240 | \n 279 | \n 529 | \n 2748 | \n 4797 | \n 3195 | \n 8460 | \n 21324 | \n 830 | \n
\n \n
\n
"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"import matplotlib.pyplot as plt\n",
"import seaborn as sns"
],
"metadata": {
"execution": {
"iopub.status.busy": "2022-10-28T02:47:01.769145Z",
"iopub.execute_input": "2022-10-28T02:47:01.769618Z",
"iopub.status.idle": "2022-10-28T02:47:02.507132Z",
"shell.execute_reply.started": "2022-10-28T02:47:01.769578Z",
"shell.execute_reply": "2022-10-28T02:47:02.506044Z"
},
"trusted": true,
"id": "cONSDexl1Cjy"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# value counts\n",
"train_df.drop(columns=['StudyInstanceUID','PatientID']).agg(['sum']).T.sort_values(by='sum').plot(kind='barh')\n",
"plt.legend(loc='lower right');"
],
"metadata": {
"execution": {
"iopub.status.busy": "2022-10-28T02:47:04.180989Z",
"iopub.execute_input": "2022-10-28T02:47:04.181680Z",
"iopub.status.idle": "2022-10-28T02:47:04.493106Z",
"shell.execute_reply.started": "2022-10-28T02:47:04.181644Z",
"shell.execute_reply": "2022-10-28T02:47:04.491889Z"
},
"trusted": true,
"id": "rd4yEDyL1Cjz",
"outputId": "738805db-8fab-48fe-81de-ece60953337c"
},
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": "