{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "os.chdir('../')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'c:\\\\mlops projects\\\\text-summarization'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%pwd" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from dataclasses import dataclass\n", "from pathlib import Path\n", "@dataclass(frozen=True)\n", "class DataValidationConfig:\n", " root_dir : Path\n", " STATUS_FILE : str\n", " ALL_REQUIRED_FILES : list" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "from textsummarizer.constants import *\n", "from textsummarizer.utils.common import read_yaml, create_directories\n", "\n", "class ConfigurationManager:\n", " def __init__(\n", " self,\n", " config_filepath = CONFIG_FILE_PATH,\n", " params_filepath = PARAMS_FILE_PATH):\n", "\n", " self.config = read_yaml(config_filepath)\n", " self.params = read_yaml(params_filepath)\n", "\n", " create_directories([self.config.artifacts_root])\n", "\n", "\n", " \n", " def get_data_validation_config(self) -> DataValidationConfig:\n", " config = self.config.data_validation\n", "\n", " create_directories([config.root_dir])\n", "\n", " data_validation_config = DataValidationConfig(\n", " root_dir=config.root_dir,\n", " STATUS_FILE=config.STATUS_FILE,\n", " ALL_REQUIRED_FILES=config.ALL_REQUIRED_FILES,\n", " )\n", "\n", " return data_validation_config" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "import os\n", "from textsummarizer.logging import logger\n", "\n", "class DataValiadtion:\n", " def __init__(self, config: DataValidationConfig):\n", " self.config = config\n", "\n", "\n", " \n", " def validate_all_files_exist(self)-> bool:\n", " try:\n", " validation_status = None\n", "\n", " all_files = os.listdir(os.path.join(\"artifacts\",\"data_ingestion\",\"samsum_dataset\"))\n", "\n", " for file in all_files:\n", " if file not in self.config.ALL_REQUIRED_FILES:\n", " validation_status = False\n", " with open(self.config.STATUS_FILE, 'w') as f:\n", " f.write(f\"Validation status: {validation_status}\")\n", " else:\n", " validation_status = True\n", " with open(self.config.STATUS_FILE, 'w') as f:\n", " f.write(f\"Validation status: {validation_status}\")\n", "\n", " return validation_status\n", " \n", " except Exception as e:\n", " raise e\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2024-08-11 16:18:48,704: INFO: common: yaml file: config\\config.yaml loaded successfully]\n", "[2024-08-11 16:18:48,706: INFO: common: yaml file: params.yaml loaded successfully]\n", "[2024-08-11 16:18:48,707: INFO: common: created directory at: artifacts]\n", "[2024-08-11 16:18:48,708: INFO: common: created directory at: artifacts/data_validation]\n" ] } ], "source": [ "try:\n", " config = ConfigurationManager()\n", " data_validataion_config = config.get_data_validation_config()\n", " data_validation = DataValiadtion(config=data_validataion_config)\n", " data_validation.validate_all_files_exist()\n", " \n", "except Exception as e:\n", " raise e" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 2 }