{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "os.chdir('../')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'c:\\\\mlops projects\\\\text-summarization'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%pwd" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from dataclasses import dataclass\n", "from pathlib import Path\n", "@dataclass(frozen=True)\n", "class DataIngestionConfig:\n", " root_dir : Path\n", " source_URL : str\n", " local_data_file : Path\n", " unzip_dir : Path" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "from textsummarizer.constants import *\n", "from textsummarizer.utils.common import read_yaml, create_directories\n", "\n", "\n", "class ConfigurationManager:\n", " def __init__(\n", " self,\n", " config_filepath = CONFIG_FILE_PATH,\n", " params_filepath = PARAMS_FILE_PATH):\n", "\n", " self.config = read_yaml(config_filepath)\n", " self.params = read_yaml(params_filepath)\n", "\n", " create_directories([self.config.artifacts_root])\n", "\n", " \n", "\n", " def get_data_ingestion_config(self) -> DataIngestionConfig:\n", " config = self.config.data_ingestion\n", "\n", " create_directories([config.root_dir])\n", "\n", " data_ingestion_config = DataIngestionConfig(\n", " root_dir=config.root_dir,\n", " source_URL=config.source_URL,\n", " local_data_file=config.local_data_file,\n", " unzip_dir=config.unzip_dir \n", " )\n", "\n", " return data_ingestion_config\n", " " ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "import os\n", "import urllib.request as request\n", "import zipfile\n", "from textsummarizer.logging import logger\n", "from textsummarizer.utils.common import get_size" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "class DataIngestion:\n", " def __init__(self, config: DataIngestionConfig):\n", " self.config = config\n", " \n", " \n", " def download_file(self):\n", " if not os.path.exists(self.config.local_data_file):\n", " filename, header = request.urlretrieve(\n", " url=self.config.source_URL,\n", " filename = self.config.local_data_file\n", " )\n", " logger.info(f'{filename} download! with following info: \\n{header}')\n", " \n", " else:\n", " logger.info(f\"File already exist size {get_size(Path(self.config.local_data_file))}\")\n", " \n", " \n", " \n", " \n", " \n", " def extract_zip_file(self):\n", " \"\"\"\n", " zip_file_path: str\n", " Extracts the zip file into the data directory\n", " Function returns None\n", " \"\"\"\n", " unzip_path = self.config.unzip_dir\n", " os.makedirs(unzip_path, exist_ok=True)\n", " with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:\n", " zip_ref.extractall(unzip_path)\n", " \n", " \n", " \n", " " ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2024-08-11 15:50:51,008: INFO: common: yaml file: config\\config.yaml loaded successfully]\n", "[2024-08-11 15:50:51,011: INFO: common: yaml file: params.yaml loaded successfully]\n", "[2024-08-11 15:50:51,012: INFO: common: created directory at: artifacts]\n", "[2024-08-11 15:50:51,014: INFO: common: created directory at: artifacts/data_ingestion]\n", "[2024-08-11 15:50:51,016: INFO: 4172299431: File already exist size ~ 7718 KB]\n" ] } ], "source": [ "try:\n", " config = ConfigurationManager()\n", " data_ingestion_config = config.get_data_ingestion_config()\n", " data_ingestion = DataIngestion(config=data_ingestion_config)\n", " data_ingestion.download_file()\n", " data_ingestion.extract_zip_file()\n", "except Exception as e:\n", " raise e" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 2 }